First article supporting material
Crude and age-sex-year adjusted mortality rates and ratios and net survival
Data Loading and Exploration
Loading Packages and uniting databases
Proceed to load the necessary packages.
Code
# --- Bootstrap reticulate con ruta relativa a getwd() ---
suppressPackageStartupMessages(library(reticulate))
# Busca .mamba_root/envs/py311/python.exe desde getwd() hacia padres
find_python_rel <- function(start = getwd(),
rel = file.path(".mamba_root","envs","py311","python.exe")) {
cur <- normalizePath(start, winslash = "/", mustWork = FALSE)
repeat {
cand <- normalizePath(file.path(cur, rel), winslash = "/", mustWork = FALSE)
if (file.exists(cand)) return(cand)
parent <- dirname(cur)
if (identical(parent, cur)) return(NA_character_) # llegó a la raíz
cur <- parent
}
}
py <- find_python_rel()
if (is.na(py)) {
stop("No se encontró Python relativo a getwd() (buscando '.mamba_root/envs/py311/python.exe').\n",
"Directorio actual: ", getwd())
}
# Forzar ese intérprete
Sys.unsetenv(c("RETICULATE_CONDAENV","RETICULATE_PYTHON_FALLBACK"))
Sys.setenv(RETICULATE_PYTHON = py)
use_python(py, required = TRUE)
py_config() # verificación
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#clean enviroment
rm(list = ls()); gc()
file.path(paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))),"data/20241015_out"))
wdpath<-
paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))))
wdpath
envpath<- if(regmatches(wdpath, regexpr("[A-Za-z]+", wdpath))=="G"){"G:/Mi unidad/Alvacast/SISTRAT 2023/"}else{"E:/Mi unidad/Alvacast/SISTRAT 2023/"}
envpath
time_before_dedup2<-Sys.time()
#base::load(paste0(wdpath,"data/20241015_out/","3_ndp_2025_05_30.Rdata"))
if (!(Sys.getenv("RSTUDIO_SESSION_TYPE") == "server" || file.exists("/.dockerenv"))) {
file.path(paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))),"data/20241015_out"))
wdpath<-
paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))))
wdpath
envpath<- if(regmatches(wdpath, regexpr("[A-Za-z]+", wdpath))=="G"){"G:/Mi unidad/Alvacast/SISTRAT 2023/"}else{"E:/Mi unidad/Alvacast/SISTRAT 2023/"}
envpath
base::load(paste0(wdpath,"data/20241015_out/","4_ndp_2025_06_06.Rdata"))
} else {
file.path(paste0(getwd(),"/_input"))
paste0(getwd(),"/_input","/4_ndp_2025_06_06.Rdata")
base::load(paste0(getwd(),"/_input","/4_ndp_2025_06_06.Rdata.enc"))
}
time_before_dedup1<-Sys.time()
password <- Sys.getenv("PASS_PPIO")
system(sprintf("7z x path/to/_input/4_ndp_2025_06_06.Rdata.7z.001 -p'%s'", password))
try(rm("HOSP_filter_pl_filt"))python: G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python.exe
libpython: G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python311.dll
pythonhome: G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311
version: 3.11.5 | packaged by conda-forge | (main, Aug 27 2023, 03:23:48) [MSC v.1936 64 bit (AMD64)]
Architecture: 64bit
numpy: [NOT FOUND]
NOTE: Python version was forced by RETICULATE_PYTHON
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 1739268 92.9 2609282 139.4 2609282 139.4
Vcells 3212755 24.6 8388608 64.0 5079521 38.8
[1] "G:/My Drive/Alvacast/SISTRAT 2023//data/20241015_out"
[1] "G:/My Drive/Alvacast/SISTRAT 2023//"
[1] "G:/Mi unidad/Alvacast/SISTRAT 2023/"
[1] 127
Code
#https://github.com/rstudio/renv/issues/544
#renv falls back to copying rather than symlinking, which is evidently very slow in this configuration.
renv::settings$use.cache(FALSE)
#only use explicit dependencies (in DESCRIPTION)
renv::settings$snapshot.type("implicit")
#check if rstools is installed
try(installr::install.Rtools(check_r_update=F))Code
if(quarto::quarto_version()<"1.7.29"){
stop("You need to install a recent quarto version") # la publicada el 28-abr-2025
}
#change repository to CL
local({
r <- getOption("repos")
r["CRAN"] <- "https://cran.dcc.uchile.cl/"
options(repos=r)
})
if(!require(pacman)){install.packages("pacman");require(pacman)}Code
if(!require(pak)){install.packages("pak");require(pak)}Code
pacman::p_unlock(lib.loc = .libPaths()) #para no tener problemas reinstalando paquetesCode
if(Sys.info()["sysname"]=="Windows"){
if (getRversion() != "4.4.1") { stop("Requires R version 4.4.1; Actual: ", getRversion()) }
}
#check docker
check_docker_running <- function() {
# Try running 'docker info' to check if Docker is running
system("docker info", intern = TRUE, ignore.stderr = TRUE)
}
install_docker <- function() {
# Open the Docker Desktop download page in the browser for installation
browseURL("https://www.docker.com/products/docker-desktop")
}
# Main logic
if (inherits(try(check_docker_running(), silent = TRUE), "try-error")) {
liftr::install_docker()
} else {
message("Docker is running.")
}Warning in system(“docker info”, intern = TRUE, ignore.stderr = TRUE): el comando ejecutado ‘docker info’ tiene el estatus 1
Code
#Registrar el font
windowsFonts(`Times New Roman` = windowsFont("TT Times New Roman"))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#PACKAGES#######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
unlink("*_cache", recursive=T)
# ----------------------------------------------------------------------
# 2. Use a single pak::pkg_install() call for most CRAN packages
# ----------------------------------------------------------------------
paks <-
c(#"git",
# To connect to github
"gh", #interface for GitHub API from R
#
"gitcreds", # manages Git credentials (usernames, passwords, tokens)
#
"usethis", # simplifies common project setup tasks for R developers
# Package to bring packages in development
"devtools",
# Package administration
"renv",
# To manipulate data
"knitr", "pander", "DT",
# Join
"fuzzyjoin", "RecordLinkage",
# For tables
"tidyverse", "janitor",
# For contingency tables
"kableExtra",
# For connections with python
"reticulate",
# To manipulate big data
"polars", "sqldf",
# To bring big databases
"nanoparquet",
# Interface for R and RStudio in R
"installr", "rmarkdown", "quarto", "yaml", #"rstudioapi",
# Time handling
"clock",
# Combine plots
"ggpubr",
# Parallelized iterative processing
"furrr",
# Work like a tibble with a data.table database
"tidytable",
# Split database into training and testing
"caret",
# Impute missing data
"missRanger", "mice",
# To modularize tasks
"job",
# For PhantomJS install checks
"webshot"
)
# dplyr
# janitor
# reshape2
# tidytable
# arrow
# boot
# broom
# car
# caret
# data.table
# DiagrammeR
# DiagrammeRsvg
# dplyr
# epiR
# epitools
# ggplot2
# glue
# htmlwidgets
# knitr
# lubridate
# naniar
# parallel
# polycor
# pROC
# psych
# readr
# rio
# rsvg
# scales
# stringr
# tableone
# rmarkdown
# biostat3
# codebook
# finalfit
# Hmisc
# kableExtra
# knitr
# devtools
# tidyr
# stringi
# stringr
# muhaz
# sqldf
# compareGroups
# survminer
# lubridate
# ggfortify
# car
# fuzzyjoin
# compareGroups
# caret
# job
# htmltools
# nanoparquet
# ggpubr
# polars
# installr
# clock
# pander
# reshape
# mice
# missRanger
# VIM
# withr
# biostat3
# broom
# glue
# finalfit
# purrr
# sf
# pak::pkg_install(paks)
pak::pak_sitrep()
# pak::sysreqs_check_installed(unique(unlist(paks)))
#pak::lockfile_create(unique(unlist(paks)), "dependencies_duplicates24.lock", dependencies=T)
#pak::lockfile_install("dependencies_duplicates24.lock")
#https://rdrr.io/cran/pak/man/faq.html
#pak::cache_delete()
library(tidytable)Code
library(polars)Warning: package ‘polars’ was built under R version 4.4.3
Code
library(ggplot2)
library(readr)
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
mexhaz, # Flexible parametric hazard regression models for survival analysis
tidyverse, # Collection of packages for data manipulation, visualization, and more (includes dplyr, ggplot2, tidyr, etc.)
janitor, # Simple tools for examining and cleaning dirty data
tableone, # Create "Table 1" summaries for descriptive statistics in medical research
cowplot, # Streamlined plot theme and plot annotations for ggplot2
grid, # Base R package for low-level graphics functions (used for arranging plots)
rio, # Simplifies data import/export with a consistent interface
coin, # Conditional inference procedures for hypothesis testing
kableExtra, # Enhances 'knitr::kable()' for creating complex tables in R Markdown
epitools, # Epidemiological tools for data analysis
relsurv, # Relative survival analysis for population-based cancer studies
survminer, # Survival analysis and visualization based on 'survival' package
biostat3, # Biostatistics functions and datasets for teaching and research
tableone, # (Repeated) Create descriptive summary tables for clinical research
popEpi, # For SMRs and SIR
metafor, # For heterogeneity test /Cochrane Q
parallel, # Parallel computing (for bootstrap)
install = T # Automatically install packages if not already installed
)
# ----------------------------------------------------------------------
# 3. Activate polars code completion (safe to try even if it fails)
# ----------------------------------------------------------------------
try(polars_code_completion_activate())Code
# ----------------------------------------------------------------------
# 4. BPMN from GitHub (not on CRAN, so install via devtools if missing)
# ----------------------------------------------------------------------
if (!requireNamespace("bpmn", quietly = TRUE)) {
devtools::install_github("bergant/bpmn")
}
# ----------------------------------------------------------------------
# 5. PhantomJS Check (use webshot if PhantomJS is missing)
# ----------------------------------------------------------------------
# if (!webshot::is_phantomjs_installed()) {
# webshot::install_phantomjs()
# }
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#FUNCTIONS######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
copiar_nombres <- function(x,row.names=FALSE,col.names=TRUE,dec=",",...) {
if(class(try(dplyr::ungroup(x)))[1]=="tbl_df"){
if(options()$OutDec=="."){
options(OutDec = dec)
write.table(format(data.frame(x)),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
options(OutDec = ".")
return(x)
} else {
options(OutDec = ",")
write.table(format(data.frame(x)),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
options(OutDec = ",")
return(x)
}
} else {
if(options()$OutDec=="."){
options(OutDec = dec)
write.table(format(x),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
options(OutDec = ".")
return(x)
} else {
options(OutDec = ",")
write.table(format(x),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
options(OutDec = ",")
return(x)
}
}
}
#WINDOWS do not restrict memory size
if(.Platform$OS.type == "windows") withAutoprint({
memory.size()
memory.size(TRUE)
memory.limit()
})Warning: ‘memory.size()’ is no longer supported
Warning: ‘memory.size()’ is no longer supported
Warning: ‘memory.limit()’ is no longer supported
Code
memory.limit(size=56000)Warning: ‘memory.limit()’ is no longer supported
Code
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#NAs are replaced with "" in knitr kable
options(knitr.kable.NA = '')
pander::panderOptions('big.mark', ',')
pander::panderOptions('decimal.mark', '.')
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#
#to format rows in bold
format_cells <- function(df, rows ,cols, value = c("italics", "bold", "strikethrough")){
# select the correct markup
# one * for italics, two ** for bold
map <- setNames(c("*", "**", "~~"), c("italics", "bold", "strikethrough"))
markup <- map[value]
for (r in rows){
for(c in cols){
# Make sure values are not factors
df[[c]] <- as.character( df[[c]])
# Update formatting
df[r, c] <- ifelse(nchar(df[r, c])==0,"",paste0(markup, gsub(" ", "", df[r, c]), markup))
}
}
return(df)
}
#To produce line breaks in messages and warnings
knitr::knit_hooks$set(
error = function(x, options) {
paste('\n\n<div class="alert alert-danger" style="font-size: small !important;">',
gsub('##', '\n', gsub('^##\ Error', '**Error**', x)),
'</div>', sep = '\n')
},
warning = function(x, options) {
paste('\n\n<div class="alert alert-warning" style="font-size: small !important;">',
gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)),
'</div>', sep = '\n')
},
message = function(x, options) {
paste('<div class="message" style="font-size: small !important;">',
gsub('##', '\n', x),
'</div>', sep = '\n')
}
)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
sum_dates <- function(x){
cbind.data.frame(
min= as.Date(min(unclass(as.Date(x)), na.rm=T), origin = "1970-01-01"),
p001= as.Date(quantile(unclass(as.Date(x)), .001, na.rm=T), origin = "1970-01-01"),
p005= as.Date(quantile(unclass(as.Date(x)), .005, na.rm=T), origin = "1970-01-01"),
p025= as.Date(quantile(unclass(as.Date(x)), .025, na.rm=T), origin = "1970-01-01"),
p25= as.Date(quantile(unclass(as.Date(x)), .25, na.rm=T), origin = "1970-01-01"),
p50= as.Date(quantile(unclass(as.Date(x)), .5, na.rm=T), origin = "1970-01-01"),
p75= as.Date(quantile(unclass(as.Date(x)), .75, na.rm=T), origin = "1970-01-01"),
p975= as.Date(quantile(unclass(as.Date(x)), .975, na.rm=T), origin = "1970-01-01"),
p995= as.Date(quantile(unclass(as.Date(x)), .995, na.rm=T), origin = "1970-01-01"),
p999= as.Date(quantile(unclass(as.Date(x)), .999, na.rm=T), origin = "1970-01-01"),
max= as.Date(max(unclass(as.Date(x)), na.rm=T), origin = "1970-01-01")
)
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Define the function adapted for Polars
sum_dates_polars <- function(df, date_col) {
# Create the list of quantiles
quantiles <- c(0.001, 0.005, 0.025, 0.25, 0.5, 0.75, 0.975, 0.995, 0.999)
# Create expressions to calculate min and max
expr_list <- list(
pl$col(date_col)$min()$alias("min"),
pl$col(date_col)$max()$alias("max")
)
# Add expressions for quantiles
for (q in quantiles) {
expr_list <- append(expr_list, pl$col(date_col)$quantile(q)$alias(paste0("p", sub("\\.", "", as.character(q)))))
}
# Apply the expressions and return a DataFrame with the results
df$select(expr_list)
}
# Custom function for sampling with a seed
sample_n_with_seed <- function(data, size, seed) {
set.seed(seed)
dplyr::sample_n(data, size)
}
# Function to get the most frequent value
most_frequent <- function(x) {
uniq_vals <- unique(x)
freq_vals <- sapply(uniq_vals, function(val) sum(x == val))
most_freq <- uniq_vals[which(freq_vals == max(freq_vals))]
if (length(most_freq) == 1) {
return(most_freq)
} else {
return(NA)
}
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#CONFIG #######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
options(scipen=2) #display numbers rather scientific number
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
# Define the function first
#oins these values with semicolons and optionally truncates the result if it exceeds a specified width.
toString2 <- function(x, width = NULL, ...) {
string <- paste(x, collapse = "; ")
if (missing(width) || is.null(width) || width == 0)
return(string)
if (width < 0)
stop("'width' must be positive")
if (nchar(string, type = "w") > width) {
width <- max(6, width)
string <- paste0(substr(string, 1, width - 3), "...")
}
string
}
extract_fit <- function(modelo) {
logLik_value <- modelo$loglik
# n_params <- attr(logLik_value, "df")
# n_obs <- length(modelo$y)
n_params <- modelo$n.par
n_obs <- modelo$n.obs #no sé si mejor n.obs.tot
convergence_code <- modelo$code # Check convergence status (0, 1, or 2)
aic <- 2 * modelo$n.par - 2 * modelo$loglik
bic <- log(modelo$n.obs) * modelo$n.par - 2 * modelo$loglik
data.frame(
LogLikelihood = as.numeric(logLik_value),
NumParameters = n_params,
NumObservations = n_obs,
AIC = aic,
BIC = bic,
convergence = convergence_code
)
}
calculate_smr_orig = function(data) {
data|>
summarise(
Observed = sum(observed),
Expected = sum(expected)
)|>
rowwise()|>
mutate(
SMR = Observed / Expected,
lo = biostat3::poisson.ci(Observed, Expected)[1],
up = biostat3::poisson.ci(Observed, Expected)[2]
)|>
ungroup()
}
calculate_smr = function(data) {
data|>
summarise(
Observed = sum(observed, na.rm=T),
Expected = sum(expected, na.rm=T)
)|>
rowwise()|>
mutate(
SMR = Observed / Expected,
lo = biostat3::poisson.ci(Observed, Expected)[1],
up = biostat3::poisson.ci(Observed, Expected)[2]
)|>
ungroup()
}
calculate_smr_alt <- function(data) {
data|>
summarise(
Observed = sum(observed),
Expected = sum(expected)
)|>
rowwise()|>
mutate(
# Reemplazar Expected == 0 con un valor pequeño
Expected = ifelse(Expected == 0, 1e-5, Expected),
Observed = ifelse(Observed == 0, 1e-5, Observed),
SMR = Observed / Expected,
lo = biostat3::poisson.ci(Observed, Expected)[1],
up = biostat3::poisson.ci(Observed, Expected)[2]
)|>
ungroup()
}
theme_sjPlot_manual <- function() {
theme(
plot.title = element_text(size = 16, face = "bold"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(color = "gray80"),
panel.grid.minor = element_line(color = "gray90")
)
}
summarize_numerical_tt <- function(df, var) {
df <- as_tidytable(df)[!is.na(status)] # drop rows with missing status
# ── 1. summaries by status ──────────────────────────────────────
by_status <- df %>%
tidytable::summarise(
Total = .N,
Mean = sprintf("%.1f", mean(get(var), na.rm = TRUE)),
SD = sprintf("%.1f", sd(get(var), na.rm = TRUE)),
Median = sprintf("%.1f", median(get(var), na.rm = TRUE)),
IQR = sprintf("%.1f", IQR(get(var), na.rm = TRUE)),
Min = sprintf("%.1f", min(get(var), na.rm = TRUE)),
Max = sprintf("%.1f", max(get(var), na.rm = TRUE)),
pres = sprintf("%.1f [%.1f-%.1f]",
median(get(var), na.rm = TRUE),
quantile(get(var), .25, na.rm = TRUE),
quantile(get(var), .75, na.rm = TRUE)),
.by = status
)|> tidytable::mutate(status= as.character(status))
# ── 2. overall (Total) row ──────────────────────────────────────
overall <- df %>%
tidytable::summarise(
Total = .N,
Mean = sprintf("%.1f", mean(get(var), na.rm = TRUE)),
SD = sprintf("%.1f", sd(get(var), na.rm = TRUE)),
Median = sprintf("%.1f", median(get(var), na.rm = TRUE)),
IQR = sprintf("%.1f", IQR(get(var), na.rm = TRUE)),
Min = sprintf("%.1f", min(get(var), na.rm = TRUE)),
Max = sprintf("%.1f", max(get(var), na.rm = TRUE)),
pres = sprintf("%.1f [%.1f-%.1f]",
median(get(var), na.rm = TRUE),
quantile(get(var), .25, na.rm = TRUE),
quantile(get(var), .75, na.rm = TRUE))
) %>%
mutate(status = "Total")
# ── 3. bind and return ──────────────────────────────────────────
bind_rows(by_status, overall) |>
dplyr::mutate(status = factor(status, levels = c("Total", "0", "1"))) |>
arrange(status)
}
summarize_categorical_tt <- function(.data, var) {
var <- rlang::as_name(rlang::ensym(var)) # make sure it’s a plain string
tbl <- as_tidytable(.data) # guarantee tidytable class
## ── counts per status ──────────────────────────────────────────────
tab <- tbl %>%
tidytable::summarise(
n = .N,
.by = c("status", var) # fast group-by in tidytable
) %>%
pivot_wider(
names_from = status,
values_from = n,
values_fill = 0
) %>%
mutate(Total = rowSums(across(where(is.numeric))))
## ── build “count (pct)” strings ────────────────────────────────────
status_cols <- setdiff(names(tab), c(var, "Total"))
status_cols <- sort(status_cols, na.last = TRUE) # ensure "0" then "1", etc.
pct_cols <- c("Total", status_cols)
tab <- tab %>%
tidytable::mutate(
across(
all_of(pct_cols),
\(x) paste0(x, " (", sprintf("%.1f", x / sum(x) * 100), ")"),
.names = "{.col}_pct"
)
)
## ── keep only the pct columns in required order ────────────────────
tab %>%
tidytable::select(all_of(c(var, "Total_pct", paste0(status_cols, "_pct"))))
}
#SISTRAT23_c1_2010_2022_df_prev1q_sel3a_surv SISTRAT23_c1_2010_2022_df_prev1q_sel3b_surv
# treatment modality
# substance use at treatment entry
# initial treatment outcome
# days in treatment
# age at treatment entry
# year of treatment initiation
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
year_fraction <- function(dates) {
dates <- as.Date(dates) # Ensure input is Date class
years <- as.numeric(format(dates, "%Y"))
day_of_year <- as.numeric(format(dates, "%j"))
# Function to check leap year
is_leap_year <- function(year) {
(year %% 4 == 0 & year %% 100 != 0) | (year %% 400 == 0)
}
days_in_year <- ifelse(is_leap_year(years), 366, 365)
years + (day_of_year - 1) / days_in_year
}
cmr_ci_phi <- function(observed, pyrs,
phi = 1, # escala de sobre-dispersión
conf.level = 0.95,
factor = 1000) {
# Validación básica
if (any(is.na(observed) | is.na(pyrs)))
stop("NAs no permitidos; limpia antes los datos.")
if (any(observed < 0 | pyrs <= 0))
stop("observed ≥ 0 y pyrs > 0 son obligatorios.")
z <- qnorm(1 - (1 - conf.level) / 2)
# CMR
cmr <- observed / pyrs * factor
# IC
ci_low <- ci_high <- numeric(length(observed))
zeros <- observed == 0
if (any(zeros)) {
# Límite gamma exacto cuando hay cero muertes
ci_low[zeros] <- 0
ci_high[zeros] <- qgamma(conf.level, shape = 1,
rate = pyrs[zeros]) * factor
}
if (any(!zeros)) {
# Log-normal con sobre-dispersión
se_log <- sqrt(phi / observed[!zeros])
ci_low[!zeros] <- cmr[!zeros] * exp(-z * se_log)
ci_high[!zeros] <- cmr[!zeros] * exp( z * se_log)
}
data.frame(CMR = cmr,
CI_low = ci_low,
CI_high = ci_high)
}
invisible("using sir output")
sir_ci_phi_improved <- function(sir_obj, phi, conf.level = 0.95) {
#Método log-normal, the best, dont overestimate, or subestimate variance
# extract totals
total_obs <- sir_obj$observed
total_exp <- sir_obj$expected
# Calculate SEs
theta <- total_obs / total_exp
# Normal approximation, n>20
# Corrected SEs (McCullagh & Nelder, 1989)
# “For ratios of Poisson means (such as SIR or CMR), the appropriate approach is to use multinomial or binomial models conditioned on the total observed.”
# Breslow NE, Day NE. Statistical Methods in Cancer Research, Vol. II (IARC, 1987), §2.2. – Derives the same SE formula and recommends inflating by φ in the presence of heterogeneity.
z <- qnorm(1 - (1 - conf.level)/2)
se_log <- sqrt(phi / total_obs) # Valid formula
# ICs
lci <- theta * exp(-z * se_log)
uci <- theta * exp(z * se_log)
data.frame(
SIR = theta,
CI_low = lci,
CI_high = uci,
phi_used = phi
)
}
invisible("using glmdf")
sir_ci_phi_improved2 <- function(observed, expected, phi = 1, conf.level = 0.95) {
z <- qnorm(1 - (1 - conf.level)/2)
theta <- observed / expected
# SE en escala logarítmica con ajuste φ
se_log <- sqrt(phi / observed)
# ICs multiplicativos (correctos para ratios)
lci <- theta * exp(-z * se_log)
uci <- theta * exp(z * se_log)
list(
SIR = theta,
CI_low = lci,
CI_high = uci
)
}
#We estimated the dispersion parameter (φ) for each subgroup by fitting an intercept-only quasi-Poisson model. This method assumes no major unmeasured heterogeneity within subgroups. While this may slightly overestimate dispersion due to residual confounding, it yields conservative confidence intervals that maintain the nominal coverage (Breslow and Day, 1987)
sir_cmr_subgroup <- function(df, group_var, phi = NULL) {
sir_ci_phi_row <- function(observed, expected, phi) {
sir_ci_phi_improved(
list(observed = observed, expected = expected),
phi
)
}
# 1. Compute summary statistics by group
group_stats <- df %>%
dplyr::group_by(dplyr::across(dplyr::all_of(group_var))) %>%
dplyr::summarise(
observed = sum(from0to1, na.rm = TRUE),
pyrs = sum(pyrs, na.rm = TRUE),
expected = sum(expected, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::mutate(
sir = observed / expected,
ear = (observed - expected) / pyrs * 1000
)
# 2. Compute dispersion parameter (phi) for each group if not provided
if (is.null(phi)) {
phi_vals <- df %>%
dplyr::group_by(dplyr::across(dplyr::all_of(group_var))) %>%
dplyr::group_modify(~ {
# Skip groups with no events
if (sum(.x$from0to1, na.rm = TRUE) == 0) {
return(data.frame(phi = NA_real_))
}
# Fit quasi-Poisson model
mod <- tryCatch(
stats::glm(
from0to1 ~ 1,
family = quasipoisson,
offset = log(expected),
data = .x
),
error = function(e) NULL
)
if (is.null(mod)) return(data.frame(phi = NA_real_))
data.frame(phi = summary(mod)$dispersion)
}) %>%
dplyr::ungroup()
group_stats <- dplyr::left_join(group_stats, phi_vals, by = group_var)
} else {
group_stats$phi <- phi
}
# 3. Compute confidence intervals
result <- group_stats %>%
dplyr::mutate(
# Compute CMR with CI
cmr_ci = purrr::pmap(
list(observed, pyrs, phi),
function(obs, py, p) {
cmr_ci_phi(obs, py, p)
}
),
# Compute SIR with CI
sir_ci = purrr::pmap(
list(observed, expected, phi),
function(obs, exp, p) {
sir_ci_phi_improved2(obs, exp, p)
}
)
) %>%
dplyr::mutate(
# Format CMR values
CMR_1000 = purrr::map_chr(cmr_ci, ~ sprintf(
"%.1f (%.1f-%.1f)", .x$CMR, .x$CI_low, .x$CI_high
)),
# Format SIR values
SMR = purrr::map_chr(sir_ci, ~ sprintf(
"%.6f (%.6f-%.6f)", .x$SIR, .x$CI_low, .x$CI_high
))
)
# 4. Final formatting - AHORA INCLUYE PHI
result %>%
dplyr::mutate(
expected = round(expected, 0),
pyrs = round(pyrs, 0),
EAR = sprintf("%.2f", ear),
# Formatear phi con 3 decimales
phi_fmt = phi
) %>%
dplyr::select(
dplyr::all_of(group_var),
observed,
pyrs,
CMR_1000,
expected,
SMR,
EAR,
phi = phi_fmt # Nueva columna con phi formateado
)
}
#2025-06-16
#For indirect SMRs (log(expected))
extract_phi <- function(df) {
df_glm <- df %>%
left_join(mx_1x1_banded, by = c("agegroup", "year", "sex")) %>%
mutate(expected = pyrs * haz)
# Modelo con variables de estratificación
model_poisson <- glm(
from0to1 ~ factor(agegroup) + factor(sex) + factor(year),
family = poisson,
offset = log(expected),
data = df_glm
)
# Cálculo robusto de φ
pearson_chisq <- sum(residuals(model_poisson, type = "pearson")^2)
df_residual <- df.residual(model_poisson)
dispersion_index <- pearson_chisq / df_residual
return(dispersion_index)
}
#etract phi for a directly standardized mortality rates
extract_phi_dsr <- function(df) {
df_glm <- df %>%
left_join(mx_1x1_banded, by = c("agegroup", "year", "sex")) %>%
mutate(expected = pyrs * haz)
# model with stratification variables
model_poisson <- glm(
from0to1 ~ factor(agegroup) + factor(sex) + factor(year),
family = poisson,
offset = log(pyrs),
data = df_glm
)
# Cálculo robusto de φ
pearson_chisq <- sum(residuals(model_poisson, type = "pearson")^2)
df_residual <- df.residual(model_poisson)
dispersion_index <- pearson_chisq / df_residual
return(dispersion_index)
}
#Fay & Feuer (1997). Confidence intervals for directly standardized rates: a
#method based on the gamma distribution. Stat Med 16:791-801.
dsr_format <- function(rate, se, phi = 1, factor = 1e3, digits = 2, conf = 0.95) {
z <- qnorm(1 - (1 - conf)/2)
sprintf(paste0("%.", digits, "f (%.", digits, "f–%.", digits, "f)"),
rate*factor,
pmax(0, (rate - z*se*sqrt(phi))*factor),
(rate + z*se*sqrt(phi))*factor)
}
dsr_format_corr <- function(rate, se, phi = 1,
factor = 1e4, # multiplica la tasa (p. ej. ×100 000)
digits = 2, # decimales a mostrar
conf = 0.95) { # nivel de confianza
z <- qnorm(1 - (1 - conf) / 2)
se <- se * sqrt(phi) # sobredispersión
se_log <- ifelse(rate > 0, se / rate, NA)
L <- rate * exp(-z * se_log)
U <- rate * exp( z * se_log)
sprintf(paste0("%.", digits, "f (%.", digits, "f–%.", digits, "f)"),
rate * factor, L * factor, U * factor)
}
extract_spline_data <- function(x) {
if (is.null(x$spline.seq.A)) stop('No splines found.')
plotdim <- as.numeric(c(!is.null(x$spline.seq.A),
!is.null(x$spline.seq.B),
!is.null(x$spline.seq.C)))
splines <- c('spline.seq.A', 'spline.seq.B', 'spline.seq.C')[1:sum(plotdim)]
ests <- gsub("seq", "est", splines)
library(tidyr)
library(dplyr)
all_data <- lapply(seq_along(splines), function(i) {
spline_name <- splines[i]
est_name <- ests[i]
# Extract spline sequence and estimates
spline_seq <- x[[spline_name]]
est_df <- x[[est_name]]
# Check if est_df has a factor/level column (usually first column)
if (ncol(est_df) >= 4) {
# Assuming first column is level, next columns are estimate, lower CI, upper CI
df <- data.frame(
spline = x$spline[i],
spline_value = spline_seq,
level = est_df[,1],
estimate = est_df[,2],
lower_ci = est_df[,3],
upper_ci = est_df[,4]
)
} else {
# If no levels, just estimates and CIs
df <- data.frame(
spline = x$spline[i],
spline_value = spline_seq,
estimate = est_df[,1],
lower_ci = est_df[,2],
upper_ci = est_df[,3]
)
}
return(df)
})
# Combine all spline data into one data frame
combined_df <- bind_rows(all_data)
return(combined_df)
}
hetero <- function(rrr1, lcl1, ucl1, rrr2, lcl2, ucl2) {
# Calcular varianzas en escala logarítmica
z <- qnorm(0.975)
var1 <- ((log(ucl1) - log(rrr1)) / z)^2
var2 <- ((log(ucl2) - log(rrr2)) / z)^2
# Estimación agrupada
log_rrr1 <- log(rrr1)
log_rrr2 <- log(rrr2)
pooled_log <- (log_rrr1/var1 + log_rrr2/var2) / (1/var1 + 1/var2)
pooled <- exp(pooled_log)
# Cochran's Q
q_val <- ((log_rrr1 - pooled_log)^2)/var1 + ((log_rrr2 - pooled_log)^2)/var2
p_val <- pchisq(q_val, df = 1, lower.tail = FALSE)
# Test de interacción de Altman
d_log <- log_rrr1 - log_rrr2
se1 <- (log(ucl1) - log(lcl1)) / (2*z)
se2 <- (log(ucl2) - log(lcl2)) / (2*z)
se_d <- sqrt(se1^2 + se2^2)
z_test <- d_log / se_d
p_inter <- 2 * pnorm(-abs(z_test))
# Intervalos de confianza
ci_diff <- c(d_log - z*se_d, d_log + z*se_d)
ci_ratio <- exp(ci_diff)
# Salida con formato idéntico al de Stata
cat("First RRR (95% CI): ", sprintf("%4.2f (%4.2f, %4.2f)\n", rrr1, lcl1, ucl1))
cat("Second RRR (95% CI): ", sprintf("%4.2f (%4.2f, %4.2f)\n\n", rrr2, lcl2, ucl2))
cat("Pooled RRR =", sprintf("%4.2f\n", pooled))
cat("Cochran's Q =", sprintf("%4.2f", q_val), " p-value =", sprintf("%5.4f\n\n", p_val))
cat("Altman test for interaction\n\n")
cat("Diff in log RRRs:", sprintf("%5.3f\n", d_log))
cat("95% CI for diff: (", sprintf("%4.3f, %4.3f)\n", ci_diff[1], ci_diff[2]))
cat("Test of interaction: z =", sprintf("%5.3f", z_test), " p =", sprintf("%5.4f\n", p_inter))
cat("Ratio of estimates, 95% CI:", sprintf("%4.2f (%4.2f, %4.2f)\n",
exp(d_log), ci_ratio[1], ci_ratio[2]))
}
pairwise_smr_test <- function(smrs, lowers, uppers, alpha = 0.05) {
# Validate inputs
stopifnot(
length(smrs) == length(lowers),
length(smrs) == length(uppers),
alpha > 0, alpha < 1
)
# Calculate SEs from 95% CIs (FIXED confidence level)
# Original data uses 95% CIs -> use z = 1.96 regardless of 'alpha'
z_ci <- qnorm(0.975) # 1.96 for 95% CI
ses <- (uppers - lowers) / (2 * z_ci)
# Generate pairwise comparisons
n <- length(smrs)
group_names <- paste("Group", 1:n)
comparisons <- combn(n, 2, simplify = FALSE)
# Calculate differences, SEs, z-scores, p-values
results <- lapply(comparisons, function(pair) {
i <- pair[1]
j <- pair[2]
data.frame(
group1 = group_names[i],
group2 = group_names[j],
smr1 = smrs[i],
smr2 = smrs[j],
difference = smrs[i] - smrs[j],
se_diff = sqrt(ses[i]^2 + ses[j]^2)
)
}) |> do.call(what = rbind)
# Add z-scores and p-values
results$z <- abs(results$difference) / results$se_diff
results$p_unadj <- 2 * pnorm(-results$z)
# Apply Holm-Bonferroni correction
results$p_holm <- p.adjust(results$p_unadj, method = "holm")
# Significance labels
results$significance <- cut(
results$p_holm,
breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),
labels = c("***", "**", "*", "NS")
)
return(results)
}Error in contrib.url(repos, "source") :
trying to use CRAN without setting a mirror
* pak version:
- 0.8.0.1
* Version information:
- pak platform: x86_64-w64-mingw32 (current: x86_64-w64-mingw32, compatible)
- pak repository: - (local install?)
* Optional packages installed:
- pillar
* Library path:
- G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32
- C:/Program Files/R/R-4.4.1/library
* pak is installed at G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32/pak.
* Dependency versions:
- callr 3.7.6
- cli 3.6.2
- curl 5.2.1
- desc 1.4.3
- filelock 1.0.3
- jsonlite 1.8.8
- lpSolve 5.6.23.9000
- pkgbuild 1.4.4
- pkgcache 2.2.2.9000
- pkgdepends 0.7.2.9000
- pkgsearch 3.1.3.9000
- processx 3.8.4
- ps 1.7.6
- R6 2.5.1
- zip 2.3.1
* Dependencies can be loaded
> memory.size()
[1] Inf
> memory.size(TRUE)
[1] Inf
> memory.limit()
[1] Inf
[1] Inf
Loads and cleans databases of substance use disorder treatments and mortality in Chile, generating adjusted mortality rates and standardized mortality ratios. Additionally, calculates net survival using survival analysis techniques and compares observed mortality with that expected according to age, year, and sex.
Import & format database
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel<-
SISTRAT23_c1_2010_2022_df_prev1q[, c("rn",
"hash_key",
"adm_age_rec2",#"adm_yr_rec",
"birth_date_rec",#"birth_date_rec_imp",
"adm_date_rec2",#"adm_date_rec",
"adm_date_num_rec2",#"adm_date_rec_num",
"TABLE",#"TABLE_rec",
"dit_rec6",#"dit_rec",
"disch_date_num_rec6",#"disch_date_num",
"disch_date_rec6",
"tr_compliance_rec3",#motivo_de_egreso",
"primary_sub",#"sustancia_principal",
"second_sub1",#"sustancia_principal",
"second_sub2",#"sustancia_principal",
"second_sub3",#"sustancia_principal",
"sub_dep_icd10_status",#"diagnostico_trs_consumo_sustancia",
"sex_rec", #"sexo",
"municipallity_res_cutpre18",#"comuna_residencia",
"region_del_centro",#
"evaluacion_del_proceso_terapeutico",
"edad_inicio_consumo",
"ed_attainment",
"plan_type"# treatment modality
)]We counted the amount of duplicate records in terms of HASH and admission age, and then we discard.
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel|>
janitor::get_dupes(hash_key, adm_age_rec2)|> nrow()
rows_with_dupes<-SISTRAT23_c1_2010_2022_df_prev1q_sel|>
janitor::get_dupes(hash_key, adm_age_rec2)|>
pull(rn)
SISTRAT23_c1_2010_2022_df_prev1q_sel2<-SISTRAT23_c1_2010_2022_df_prev1q_sel|>
(\(df) {
nrow(df)->>before_disc_dup_hash_age_adm_nrow
cat(paste0("1.Number of cases before discarding duplicates in admission age and hash key: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("1.Number of patients before discarding duplicates in admission age and hash key: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
cat(paste0("Records with unavailable missing days in treatment (eg., currently in treatment): ",df |> mutate(years_in_tr= time_length(interval(adm_date_rec2, disch_date_rec6), unit="year")) |>
filter(is.na(years_in_tr)) |> nrow(), "\n"));
cat(paste0("Records with negative days in treatment: ",df |> mutate(years_in_tr= time_length(interval(adm_date_rec2, disch_date_rec6), unit="year")) |>
filter(years_in_tr<0)|> nrow(), "\n")) ;
cat(paste0("Records with more than 3 years in treatment: ",df |> mutate(years_in_tr= time_length(interval(adm_date_rec2, disch_date_rec6), unit="year")) |>
filter(years_in_tr>3)|> nrow(), "\n"))
df
})()|>
mutate(years_in_tr= time_length(interval(adm_date_rec2, disch_date_rec6), unit="year"))|>
filter(!is.na(years_in_tr)|years_in_tr>=0|years_in_tr<=3)|>
group_by(hash_key, adm_age_rec2)|>
slice_max(dit_rec6)|>
ungroup()|>
(\(df) {
nrow(df)->>after_disc_dup_hash_age_adm_nrow
cat(paste0("1.Number of cases after discarding duplicates in admission age and hash key and validating days in treatment: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("1.Number of patients after discarding duplicates in admission age and hash key and validating days in treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
df
})()
cat("Discarded rows\n")
before_disc_dup_hash_age_adm_nrow-after_disc_dup_hash_age_adm_nrow #before 18-06 there was 150,019 , p= 106,283 as a result in this step[1] 54
1.Number of cases before discarding duplicates in admission age and hash key: 150,046
1.Number of patients before discarding duplicates in admission age and hash key: 106,283
Records with unavailable missing days in treatment (eg., currently in treatment): 4007
Records with negative days in treatment: 8
Records with more than 3 years in treatment: 1039
1.Number of cases after discarding duplicates in admission age and hash key and validating days in treatment: 146,012
1.Number of patients after discarding duplicates in admission age and hash key and validating days in treatment: 103,612
Discarded rows
[1] 4034
At this date, i only have data until 2020. I also joined mortlaity database with SENDA treatments. We selected records of treatments between 2010 and 2020, of patients admitted between 18 and 64 years old.
Code
cat("Make the death date\n")
mortality$death_date <-
as.Date(paste0(mortality$ano_def, "-",
sprintf("%02.0f",mortality$mes_def), "-",
mortality$dia_def))
cat("Maximum death date available:\n")
max(mortality$death_date, na.rm=T)
#[1] "2020-12-31"
SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel2 |>
tidytable::left_join(mortality[,c("hashkey", "death_date")], by=c("hash_key"="hashkey"), multiple="first") |>
tidytable::mutate(status=ifelse(is.na(death_date), 0, 1))
SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv <- as_tidytable(SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv)
SISTRAT23_c1_2010_2022_df_prev1q_sel3a_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv|>
(\(df) {
cat(paste0("3a. Before discarding cases, cases, 2010-2019, first treatment: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("3a. Before discarding cases, patients, 2010-2019, first treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
nrow(df)->> before_df3a_nrow
df
})() |>
tidytable::arrange(hash_key, adm_age_rec2)|>
#treatments between 2010 and 2020
tidytable::filter(adm_date_rec2>="2010-01-01", adm_date_rec2<"2020-12-31")|>
tidytable::group_by(hash_key) |>
tidytable::mutate(tto= tidytable::row_number())|>
tidytable::slice_min(tto)|>
tidytable::ungroup()|>
#admission ages between 18-65
tidytable::filter(adm_age_rec2>=18, adm_age_rec2<65)|>
tidytable::mutate(post_ttos=ifelse(tto>1, 1, 0))|>
(\(df) {
cat(paste0("3a.Number of cases, 2010-2019, first treatment: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("3a.Number of patients, 2010-2019, first treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
nrow(df)->> after_df3a_nrow
df
})()
cat("Discarded records:\n")
before_df3a_nrow-after_df3a_nrowMake the death date
Maximum death date available:
[1] "2020-12-31"
3a. Before discarding cases, cases, 2010-2019, first treatment: 146,014
3a. Before discarding cases, patients, 2010-2019, first treatment: 103,612
3a.Number of cases, 2010-2019, first treatment: 88,774
3a.Number of patients, 2010-2019, first treatment: 88,774
Discarded records:
[1] 57240
We construct the database of sensitivity analyses, count previous treatments, but i get the last treatment(3b)
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel3b_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv|>
(\(df) {
cat(paste0("3b. Before discarding cases, cases, 2010-2019, last treatment: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("3b. Before discarding cases, patients, 2010-2019, last treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
nrow(df)->> before_df3b_nrow
df
})() |>
#tidytable::filter(TABLE<2021)|>
tidytable::arrange(hash_key, adm_age_rec2)|>
#treatments between 2010 and 2020
tidytable::filter(adm_date_rec2>="2010-01-01", adm_date_rec2<"2020-12-31")|>
tidytable::group_by(hash_key) |>
tidytable::mutate(tto= tidytable::row_number())|>
tidytable::slice_max(tto)|>
tidytable::ungroup()|>
#admission ages between 18-65 (85,763 - 84,502)= 84.897
tidytable::filter(adm_age_rec2>=18, adm_age_rec2<65)|>
tidytable::mutate(prev_ttos=ifelse(tto>1, 1, 0))|>
(\(df) {
cat(paste0("3b.Number of cases, 2010-2019, last (LVCF) treatment: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("3b.Number of patients, 2010-2019, last (LVCF) treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
nrow(df)->> after_df3b_nrow
df
})()
cat("Discarded records:\n")
before_df3b_nrow-after_df3b_nrow3b. Before discarding cases, cases, 2010-2019, last treatment: 146,014
3b. Before discarding cases, patients, 2010-2019, last treatment: 103,612
3b.Number of cases, 2010-2019, last (LVCF) treatment: 88,725
3b.Number of patients, 2010-2019, last (LVCF) treatment: 88,725
Discarded records:
[1] 57289
Discarded ongoing treatments (truncated, death or currently in treatment, o referrals to teratments outside SENDA network).
Code
days_years<- 365.2425
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel3a_surv|>
(\(df) {
nrow(df)->>df4a_nrow_pre
cat(paste0("Discarded (death, no tr. compliance), cases(4a): ", formatC(nrow(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3))), big.mark=",")),"\n")
cat(paste0("Discarded (death, no tr. compliance), patients(4a): ", formatC(nrow(distinct(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3)), hash_key)), big.mark=",")),"\n")
df
})()|>
tidytable::filter(!is.na(tr_compliance_rec3) & tr_compliance_rec3!="death" & !grepl("truncated|currently|referral", tr_compliance_rec3))|>
# calculamos la edad al egreso
tidytable::mutate(disch_age_rec= (dit_rec6/365.241)+adm_age_rec2)|>
tidytable::mutate(timesurv= tidytable::case_when(
status==1~ time_length(interval(adm_date_rec2, death_date), unit="year"),
status==0~ time_length(interval(adm_date_rec2, as.Date("2020-12-31")), unit="year")))|>
tidytable::mutate(death_date_rec= tidytable::case_when(
status==1~ death_date,
status==0~ as.Date("2020-12-31")))|>
tidytable::mutate(death_age_rec= as.integer(timesurv+adm_age_rec2))|>
(\(df) {
cat(paste0("4a.Number of cases: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("4a.Number of patients: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
nrow(df) ->>df4a_nrow_post
df
})()
cat("Discarded records:\n")
df4a_nrow_pre-df4a_nrow_post
cat("Records were excluded for patients who had not yet been discharged")
paste0(round(((df4a_nrow_pre-df4a_nrow_post)/222945)*100,1),"%")Discarded (death, no tr. compliance), cases(4a): 14,304
Discarded (death, no tr. compliance), patients(4a): 14,304
4a.Number of cases: 74,470
4a.Number of patients: 74,470
Discarded records:
[1] 14304
Records were excluded for patients who had not yet been discharged[1] "6.4%"
We replicated for thae database of the last recorded treatments.
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel3b_surv|>
(\(df) {
cat(paste0("Discarded (death, no tr. compliance), cases(4b): ", formatC(nrow(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3))), big.mark=",")),"\n")
cat(paste0("Discarded (death, no tr. compliance), patients(4b): ", formatC(nrow(distinct(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3)), hash_key)), big.mark=",")),"\n")
nrow(df) ->>df4b_nrow_pre
df
})()|>
tidytable::filter(!is.na(tr_compliance_rec3) & tr_compliance_rec3!="death" & !grepl("truncated|currently|referral", tr_compliance_rec3))|> # calculamos la edad al egreso
tidytable::mutate(disch_age_rec= (dit_rec6/365.241)+adm_age_rec2)|>
tidytable::mutate(death_date_rec= tidytable::case_when(
status==1~ death_date,
status==0~ as.Date("2020-12-31")))|>
tidytable::mutate(timesurv= tidytable::case_when(
status==1~ time_length(interval(adm_date_rec2, death_date), unit="year"),
status==0~ time_length(interval(adm_date_rec2, as.Date("2020-12-31")), unit="year")))|>
tidytable::mutate(death_age_rec= as.integer(timesurv+adm_age_rec2))|>
(\(df) {
cat(paste0("4b.Number of cases: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("4b.Number of patients: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
nrow(df) ->>df4b_nrow_post
df
})()
cat("Discarded records:\n")
df4b_nrow_pre-df4b_nrow_post
cat("Records were excluded for patients who had not yet been discharged")
paste0(round(((df4b_nrow_pre-df4b_nrow_post)/222945)*100,1),"%")Discarded (death, no tr. compliance), cases(4b): 11,340
Discarded (death, no tr. compliance), patients(4b): 11,340
4b.Number of cases: 77,385
4b.Number of patients: 77,385
Discarded records:
[1] 11340
Records were excluded for patients who had not yet been discharged[1] "5.1%"
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel3a_surv|>
(\(df) {
nrow(df)->>df4c_nrow_pre
cat(paste0("Discarded (death, no tr. compliance), cases(4c): ", formatC(nrow(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3))), big.mark=",")),"\n")
cat(paste0("Discarded (death, no tr. compliance), patients(4c): ", formatC(nrow(distinct(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3)), hash_key)), big.mark=",")),"\n")
df
})()|>
# calculamos la edad al egreso
tidytable::mutate(disch_age_rec= (dit_rec6/365.241)+adm_age_rec2)|>
tidytable::mutate(timesurv= tidytable::case_when(
status==1~ time_length(interval(adm_date_rec2, death_date), unit="year"),
status==0~ time_length(interval(adm_date_rec2, as.Date("2020-12-31")), unit="year")))|>
tidytable::mutate(death_date_rec= tidytable::case_when(
status==1~ death_date,
status==0~ as.Date("2020-12-31")))|>
tidytable::mutate(death_age_rec= as.integer(timesurv+adm_age_rec2))|>
(\(df) {
cat(paste0("4c.Number of cases: ", formatC(nrow(df), big.mark=",")),"\n")
cat(paste0("4c.Number of patients: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
nrow(df) ->>df4c_nrow_post
df
})()
cat("Discarded records:\n")
df4c_nrow_pre-df4c_nrow_post
cat("Records were excluded for patients who had not yet been discharged")
paste0(round(((df4c_nrow_pre-df4c_nrow_post)/222945)*100,1),"%")Discarded (death, no tr. compliance), cases(4c): 14,304
Discarded (death, no tr. compliance), patients(4c): 14,304
4c.Number of cases: 88,774
4c.Number of patients: 88,774
Discarded records:
[1] 0
Records were excluded for patients who had not yet been discharged[1] "0%"
Added admission age and year
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int <- as.integer(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2)
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$adm_age_rec2_int <- as.integer(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$adm_age_rec2)
#add admission year
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$yr_adm <- floor(year(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_date_rec2))
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$yr_adm <- floor(year(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$adm_date_rec2))
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_age_rec2_int <- as.integer(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_age_rec2)
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$yr_adm <- floor(year(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_date_rec2))Bring & format mortality tables
Import and format tables for compatibility
Code
# Importar los archivos desde la ruta base
base_path <- paste0(getwd(),"/_input/")
mltper_1x1 <- try(rio::import(paste0(base_path, "mltper_1x1.txt")))
fltper_1x1 <- try(rio::import(paste0(base_path, "fltper_1x1.txt")))
mltper_5x1 <- try(rio::import(paste0(base_path, "mltper_5x1.txt")))
fltper_5x1 <- try(rio::import(paste0(base_path, "fltper_5x1.txt")))
mltper_1x10 <- try(rio::import(paste0(base_path, "mltper_1x10.txt")))
fltper_1x10 <- try(rio::import(paste0(base_path, "fltper_1x10.txt")))
mltper_5x10 <- try(rio::import(paste0(base_path, "mltper_5x10.txt")))
fltper_5x10 <- try(rio::import(paste0(base_path, "fltper_5x10.txt")))
#filtro para obtener las tasas de los últimos 10 años
mltper_1x10_filt<-mltper_1x10[mltper_1x10$Year=="2010-2019",]
fltper_1x10_filt<-fltper_1x10[fltper_1x10$Year=="2010-2019",]
mltper_5x10_filt<-mltper_5x10[mltper_5x10$Year=="2010-2019",]
fltper_5x10_filt<-fltper_5x10[fltper_5x10$Year=="2010-2019",]
mltper_1x1_filt<-mltper_1x1[between(mltper_1x1$Year,2010,2020),]
fltper_1x1_filt<-fltper_1x1[between(fltper_1x1$Year,2010,2020),]
mltper_5x1_filt<-mltper_5x1[between(mltper_5x1$Year,2010,2020),]
fltper_5x1_filt<-fltper_5x1[between(fltper_5x1$Year,2010,2020),]
#cambiar la edad a entero
mltper_1x10_filt$age_rec <- as.numeric(mltper_1x10_filt$Age)Warning: NAs introducidos por coerción
Code
fltper_1x10_filt$age_rec <- as.numeric(fltper_1x10_filt$Age)Warning: NAs introducidos por coerción
Code
mltper_5x1_filt$age_rec <- as.numeric(mltper_5x1_filt$Age)Warning: NAs introducidos por coerción
Code
fltper_5x1_filt$age_rec <- as.numeric(fltper_5x1_filt$Age)Warning: NAs introducidos por coerción
Code
mltper_1x10_filt$sex <- "male"
fltper_1x10_filt$sex <- "female"
mltper_5x10_filt$sex <- "male"
fltper_5x10_filt$sex <- "female"
mltper_1x1_filt$sex <- "male"
fltper_1x1_filt$sex <- "female"
mltper_5x1_filt$sex <- "male"
fltper_5x1_filt$sex <- "female"
cons_rate_sex_1x10<-
rbind.data.frame(mltper_1x10_filt, fltper_1x10_filt)[,c("age_rec","sex", "qx","mx")]
cons_rate_sex_1x10<-cons_rate_sex_1x10[which(!is.na(cons_rate_sex_1x10$age_rec)),]
cons_rate_sex_1x10$lambda_p <- -log( 1 - cons_rate_sex_1x10$qx ) / 365.241
cons_rate_sex_5x10<-
rbind.data.frame(mltper_5x10_filt, fltper_5x10_filt)[,c("Age","sex", "lx", "qx","mx")]
cons_rate_sex_5x10$lambda_p <- -log( 1 - cons_rate_sex_5x10$qx ) / 365.241
invisible("Capping valores cercanos al 100% muertes")
invisible("No esn ecesario porque not engo esos casos")
cons_rate_sex_5x10<-cons_rate_sex_5x10[which(cons_rate_sex_5x10$Age!="110+"),]
cons_rate_sex_5x1<-
rbind.data.frame(mltper_5x1_filt, fltper_5x1_filt)[,c("Year","Age","sex", "lx","qx","mx")]
cons_rate_sex_5x1$lambda_p <- -log( 1 - cons_rate_sex_5x1$qx ) / 365.241
invisible("Capping valores cercanos al 100% muertes")
invisible("No esn ecesario porque not engo esos casos")
cons_rate_sex_5x1<-cons_rate_sex_5x1[which(cons_rate_sex_5x1$Age!="110+"),]Import mortality tables from National Statistics Institute, along with projections of the population.
Code
tablas_de_mortalidad_de_chile_1992_2050_agrupada <-
readxl::read_excel(paste0(base_path, "tablas-de-mortalidad-de-chile-1992-2050.xlsx"),
sheet = "BD Tablas de Mortalidad", skip = 1)|>
janitor::clean_names()|>
dplyr::filter(ano>=2010, ano<=2020)|>
dplyr::mutate(edad= readr::parse_number(edad))|>
dplyr::filter(edad>=18, edad<65, region=="País")|>
dplyr::mutate(edad_anos_rec= dplyr::case_when(edad>=18 & edad<30~1,
edad>=30 & edad<45~2,
edad>=45 & edad<60~3,
edad>=60 & edad<65~4,T~NA_real_))|>
dplyr::mutate(edad_anos_rec= factor(edad_anos_rec, levels=1:4, labels= c("18-29","30-44","45-59","60-64")))|>
dplyr::group_by(ano, sexo, edad_anos_rec)|>
dplyr::summarise(
total_d_x = sum(d_x, na.rm = TRUE), # Suma de muertes en el grupo
total_l_x = sum(l_x, na.rm = TRUE), # Suma de la población al inicio del grupo (debiese ser con años-persona)
mean_m_x = mean(m_x),
mortality_rate_grouped = total_d_x / total_l_x
)Code
proy_ine_com<-
rio::import("https://www.ine.gob.cl/docs/default-source/proyecciones-de-poblacion/cuadros-estadisticos/base-2017/ine_estimaciones-y-proyecciones-2002-2035_base-2017_comunas0381d25bc2224f51b9770a705a434b74.csv?sfvrsn=b6e930a7_3&download=true")|>
tidyr::pivot_longer(cols = dplyr::starts_with("Poblacion"),
names_to = "anio",
values_to = "poblacion")|>
dplyr::mutate(anio= gsub("Poblacion ","",anio), anio=as.numeric(anio))|>
dplyr::filter(anio>=2010 & anio<=2020)|>
dplyr::mutate(edad_anos_rec= dplyr::case_when(Edad>=15 & Edad<30~1,
Edad>=30 & Edad<45~2,
Edad>=45 & Edad<60~3,
Edad>=60 & Edad<65~4,T~NA_real_))|>
dplyr:: mutate(edad_cat = dplyr::case_when(
Edad >= 15 & Edad < 20 ~ "15-19",
Edad >= 20 & Edad < 25 ~ "20-24",
Edad >= 25 & Edad < 30 ~ "25-29",
Edad >= 30 & Edad < 35 ~ "30-34",
Edad >= 35 & Edad < 40 ~ "35-39",
Edad >= 40 & Edad < 45 ~ "40-44",
Edad >= 45 & Edad < 50 ~ "45-49",
Edad >= 50 & Edad < 55 ~ "50-54",
Edad >= 55 & Edad < 60 ~ "55-59",
Edad >= 60 & Edad < 65 ~ "60-64",
Edad >= 65 & Edad < 70 ~ "65-69",
Edad >= 70 & Edad < 75 ~ "70-74",
Edad >= 75 & Edad < 80 ~ "75-79",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))
proy_ine_reg_group<-
proy_ine_com|>
#2025 le agrego eso para que sepamos que es igual que antes ser2024
dplyr::filter(Edad>=15, Edad<65)|>
#fomateamos para calzarla con la anterior
dplyr::mutate(reg_res= sprintf("%02d", Region))|>
dplyr::group_by(reg_res, `Sexo (1=Hombre 2=Mujer)`,edad_anos_rec, anio)|>
dplyr::mutate(edad_anos_rec= factor(edad_anos_rec, levels=1:4, labels= c("15-29","30-44","45-59","60-65")))|>
dplyr::summarise(poblacion= sum(poblacion, na.rm=T))Using relsurv package to combine UN mortality databases of females and males.
Code
rt<-
relsurv::transrate.hmd(paste0(base_path, "mltper_1x1.txt"),
paste0(base_path, "fltper_1x1.txt"))
# Supongamos que `rt` es tu objeto ratetable
# Convertir las dimensiones en vectores
ages <- attr(rt, "dimnames")$age # Edad
years <- attr(rt, "dimnames")$year # Año
sexes <- attr(rt, "dimnames")$sex # Sexo
# Extraer los valores del ratetable
values <- as.vector(rt) # Convierte el array en un vector plano
# Crear un dataframe con todas las combinaciones de las dimensiones
popmort1x1 <- expand.grid(age = ages,year = years,sex = sexes)
# Añadir los valores al dataframe
popmort1x1 <- popmort1x1 %>%
mutate(rate = values*365.241) #introduje esto el 10-01-2025Code
mx_1x1<-
rbind.data.frame(cbind.data.frame(sex="male", mltper_1x1),
cbind.data.frame(sex="female", fltper_1x1))[,c("Year", "sex", "Age", "mx", "qx")]
mx_1x1$Age<- as.numeric(mx_1x1$Age)Warning: NAs introducidos por coerción
Code
years_followup<-
range(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$yr_adm)[1]:range(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$yr_adm)[2]
mx_1x1_filt<-mx_1x1[as.numeric(as.character(mx_1x1$Year)) %in% years_followup,]
mx_1x1_filt2<-mx_1x1_filt[as.numeric(as.character(mx_1x1_filt$Age)) %in% min(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int):max(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int),]
mx_1x1_filt2$lambda_p_days <- -log( 1 - mx_1x1_filt2$qx ) / 365.241
warning(paste0("El mx es rate de HMS pero multiplicado por 365.41"))Warning: El mx es rate de HMS pero multiplicado por 365.41
Code
mx_1x1_filt2$lambda_p_yrs <- -log( 1 - mx_1x1_filt2$qx )
mx_1x1_comp<-
rbind.data.frame(cbind.data.frame(sex="male", mltper_1x1),
cbind.data.frame(sex="female", fltper_1x1))
mx_1x1_comp$Age<- as.numeric(mx_1x1_comp$Age)Warning: NAs introducidos por coerción
Code
mx_1x1_comp_filt<-mx_1x1_comp[as.numeric(as.character(mx_1x1_comp$Year)) %in% years_followup,]
mx_1x1_comp_filt2<-mx_1x1_comp_filt[as.numeric(as.character(mx_1x1_comp_filt$Age)) %in% min(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int):max(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int),]Population pyramid
Code
#18-29, 30-44, 45-59, 60-64
senda_2010_2020_2015<-
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv |>
mutate(disch_age_rec_cat = dplyr::case_when(
disch_age_rec >= 18 & disch_age_rec < 30 ~ "18-29",
disch_age_rec >= 30 & disch_age_rec < 45 ~ "30-44",
disch_age_rec >= 45 & disch_age_rec < 60 ~ "45-59",
disch_age_rec >= 60 & disch_age_rec < 76 ~ "60-75", #2025-06-13: MODIFIED TO AMPLIFY AGES,AND DID IT WITH AGE AT DISCAHRGE
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))|>
group_by(sex_rec, disch_age_rec_cat)|>
summarise(n=n())|>
ungroup() |>
filter(!is.na(disch_age_rec_cat))Code
mort_2015<-
mortality|>
filter(ano_def==2015)|>
mutate(adm_age_cat = dplyr::case_when(
edad_cant >= 18 & edad_cant < 30 ~ "18-29",
edad_cant >= 30 & edad_cant < 45 ~ "30-44",
edad_cant >= 45 & edad_cant < 60 ~ "45-59",
edad_cant >= 60 & edad_cant < 76 ~ "60-75", #2025-06-13: MODIFIED TO AMPLIFY AGES
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))|>
group_by(sexo, adm_age_cat)|>
summarise(n=n())|>
ungroup()|>
mutate(sex_rec=if_else(sexo==1,"male","female"))Code
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Create a basic bar chart for one gender
basic_plot <- ggplot(
senda_2010_2020_2015,
aes(
x = disch_age_rec_cat,
fill = sex_rec,
y = ifelse(
test = sex_rec == "male",
yes = -n,
no = n
)
)
) +
geom_bar(stat = "identity")
population_pyramid <- basic_plot +
scale_y_continuous(
labels = abs,
limits = max(subset(senda_2010_2020_2015, !is.na(sex_rec)& !is.na(disch_age_rec_cat))$n) * c(-1,1)
) +
coord_flip() +
theme_minimal() +
# Change scale_color_manual to scale_fill_manual
scale_fill_manual(
name = "Sex", # Change "Group" to "Sex" to match your labs()
values = c(
"female" = "#A6CEE3",
"male" = "#1F78B4"
),
na.translate = FALSE # Prevents NA from showing in legend if applicable
) +
labs(
x = "Age",
y = "Population",
fill = "Sex", # This matches the aesthetic and your new scale_fill_manual
title = "SENDA population"
)
tablas_de_mortalidad_de_chile_1992_2050_agrupada <-
readxl::read_excel(paste0(base_path, "tablas-de-mortalidad-de-chile-1992-2050.xlsx"),
sheet = "BD Tablas de Mortalidad", skip = 1)|>
janitor::clean_names()|>
dplyr::filter(ano>=2010, ano<=2020)|>
dplyr::mutate(edad= readr::parse_number(edad))|>
dplyr::filter(edad>=18, edad<65, region=="País")|>
dplyr::mutate(edad_anos_rec= dplyr::case_when(edad>=18 & edad<30~1,
edad>=30 & edad<45~2,
edad>=45 & edad<60~3,
edad>=60 & edad<76~4,T~NA_real_))|>
dplyr::mutate(adm_age_cat= factor(edad_anos_rec, levels=1:4, labels= c("18-29","30-44","45-59","60-75")))|>
mutate(sex_rec=if_else(sexo=="Hombre","male","female"))|>
dplyr::group_by(sex_rec, adm_age_cat)|>
dplyr::summarise(
total_d_x = sum(d_x, na.rm = TRUE), # Suma de muertes en el grupo
total_l_x = sum(l_x, na.rm = TRUE), # Suma de la población al inicio del grupo
mean_m_x = mean(m_x),
mortality_rate_grouped = total_d_x / total_l_x
)Code
basic_plot2 <- ggplot(
tablas_de_mortalidad_de_chile_1992_2050_agrupada,
aes(
x = adm_age_cat,
fill = sex_rec,
y = ifelse(
test = sex_rec == "male",
yes = -total_l_x,
no = total_l_x
)
)
) +
geom_bar(stat = "identity")
population_pyramid2 <- basic_plot2 +
scale_y_continuous(
labels = abs,
limits = max(subset(tablas_de_mortalidad_de_chile_1992_2050_agrupada, !is.na(sex_rec))$total_l_x) * c(-1,1)
) +
coord_flip() +
theme_minimal() +
# Change scale_color_manual to scale_fill_manual
scale_fill_manual(
name = "Sex", # Change "Group" to "Sex" to match your labs()
values = c(
"female" = "#A6CEE3",
"male" = "#1F78B4"
),
na.translate = FALSE # Prevents NA from showing in legend if applicable
) +
labs(
x = "Age",
y = "Population",
fill = "Sex", # This matches the aesthetic and your new scale_fill_manual
title = "Chilean population"
)
# Create a basic bar chart for one gender
basic_plot <- ggplot(
senda_2010_2020_2015,
aes(
x = disch_age_rec_cat,
fill = sex_rec,
y = ifelse(
test = sex_rec == "male",
yes = -n,
no = n
)
)
) +
geom_bar(stat = "identity")
plot_grid(population_pyramid+ theme(legend.position="none")+scale_y_continuous(
labels = function(x) {
scales::number_format(scale = 1e-6, suffix = "M")(abs(x))
}), population_pyramid2+ labs(x=NULL)+ theme(axis.text.y = element_blank())+ scale_y_continuous(
labels = function(x) {
scales::number_format(scale = 1e-6, suffix = "M")(abs(x))
}), ncol = 2)Code
ggsave(paste0(getwd(),"/_figs/pyramid.png"), dpi = 600, width = 9)Focused on treatment database
Given that the variable would be useful, we imported age of onset of substance use and combined different databases and get the mean to replace inconsistencies across records for each patient.
Code
CONS_C2_2324 <- c2_2324 %>% rename(edad_inicio = edad_inicio_sustancia_inicial, HASH_KEY=hashkey)
CONS_C2 <- CONS_C2 %>% rename(edad_inicio = edad_inicio_sustancia_inicial)
CONS_C4 <- CONS_C4 %>% rename(edad_inicio = edaddeiniciosustanciainicia)
CONS_C5 <- CONS_C5 %>% rename(edad_inicio = edad_inicio_sustancia_inicial)
CONS_C6 <- CONS_C6 %>% rename(edad_inicio = edaddeiniciosustanciainicia)
SISTRAT23_c1_2010_2022_df_prev1f <- SISTRAT23_c1_2010_2022_df_prev1f %>%
rename(edad_inicio = edad_inicio_consumo, HASH_KEY=hash_key)
SISTRAT23_c1_2023_2024_df2 <- SISTRAT23_c1_2023_2024_df2 %>%
rename(edad_inicio = edad_inicio_consumo, HASH_KEY=hash_key)
bases <- list(
CONS_C2 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
CONS_C2_2324 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
CONS_C4 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
CONS_C5 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
CONS_C6 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
SISTRAT23_c1_2010_2022_df_prev1f %>%
dplyr::select(HASH_KEY, edad_inicio) %>%
dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
SISTRAT23_c1_2023_2024_df2 %>%
dplyr::select(HASH_KEY, edad_inicio) %>%
dplyr::mutate(edad_inicio = as.numeric(edad_inicio))
)Warning: There was 1 warning in dplyr::mutate(). ℹ In argument: edad_inicio = as.numeric(edad_inicio). Caused by warning: ! NAs introducidos por coerción There was 1 warning in dplyr::mutate(). ℹ In argument: edad_inicio = as.numeric(edad_inicio). Caused by warning: ! NAs introducidos por coerción
Code
# Unir todas las bases
edad_unificada <- bind_rows(bases)
# Calcular el promedio de edad por HASH_KEY
promedios_edades <- edad_unificada %>%
group_by(HASH_KEY) %>%
summarise(promedio_edad = mean(edad_inicio, na.rm = TRUE))Code
pip install -U hf-transfer transformers kernels torchinvalid syntax (<string>, line 1)
We enhanced the dataset by categorizing admission age into meaningful groups and converting birth, admission, discharge, and death dates into continuous fractional year formats. Additionally, we created a macrozone variable based on regional groupings to facilitate the analysis of mortality distribution differences across geographic areas. The primary substance of concern was recoded into licit (alcohol) versus illicit (all other substances). Treatment compliance status was simplified into completed versus not completed categories, and the treatment plan type was classified as residential or ambulatory. Finally, we incorporated average age at substance use onset, merging it from an external dataset and using it to impute missing values in the onset age variable.
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv|>
mutate(adm_age_cat = dplyr::case_when(
adm_age_rec2 >= 18 & adm_age_rec2 < 30 ~ "18-29",
adm_age_rec2 >= 30 & adm_age_rec2 < 45 ~ "30-44",
adm_age_rec2 >= 45 & adm_age_rec2 < 60 ~ "45-59",
adm_age_rec2 >= 60 & adm_age_rec2 < 65 ~ "60-64",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))|>
filter(!is.na(adm_age_rec2))|>
mutate(yr_fr_birth_date_rec= year_fraction(birth_date_rec),
yr_fr_adm_date= year_fraction(adm_date_rec2),
yr_fr_disch_date= year_fraction(disch_date_rec6),
yr_fr_death_date_rec= year_fraction(death_date_rec))|>
mutate(macrozone = case_when(
region_del_centro %in% c("de arica y parinacota", "de tarapaca", "de antofagasta", "de atacama") ~ "1.North",
region_del_centro %in% c("de coquimbo", "de valparaiso")~ "2.Center",
region_del_centro %in% c("del libertador general bernardo ohiggins", "del maule", "del bio-bio") ~ "3.South-center",
region_del_centro %in% c("de la araucania ", "de los rios", "de los lagos") ~ "4.South",
region_del_centro %in% c("de magallanes y la antartica chilena", "aysen") ~ "5.Austral",
TRUE ~ "Metropolitan" # En caso de que algún código no esté especificado
))|>
mutate(prim_sub_licit=ifelse(primary_sub=="alcohol","licit","illicit"))|>
mutate(tr_compliance_status= case_when(grepl("completion", tr_compliance_rec3)~ "Completed", grepl("dropout|discharge", tr_compliance_rec3)~ "Not completed"))|>
mutate(rm_norm= ifelse(macrozone=="Metropolitan",1,0))|>
mutate(res_plan= ifelse(grepl("pr", plan_type),1,0))|>
tidytable::select(-any_of(c("promedio_edad")))|>
left_join( promedios_edades, by=c("hash_key"="HASH_KEY"), multiple="first")|>
mutate(sub_use_onset= case_when(promedio_edad>4~ promedio_edad, T~edad_inicio_consumo))We replicated the actions for the database for sensitivity analysis.
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv|>
mutate(adm_age_cat = dplyr::case_when(
adm_age_rec2 >= 18 & adm_age_rec2 < 30 ~ "18-29",
adm_age_rec2 >= 30 & adm_age_rec2 < 45 ~ "30-44",
adm_age_rec2 >= 45 & adm_age_rec2 < 60 ~ "45-59",
adm_age_rec2 >= 60 & adm_age_rec2 < 65 ~ "60-64",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))|>
filter(!is.na(adm_age_rec2))|>
mutate(yr_fr_birth_date_rec= year_fraction(birth_date_rec),
yr_fr_adm_date= year_fraction(adm_date_rec2),
yr_fr_disch_date= year_fraction(disch_date_rec6),
yr_fr_death_date_rec= year_fraction(death_date_rec))|>
mutate(macrozone = case_when(
region_del_centro %in% c("de arica y parinacota", "de tarapaca", "de antofagasta", "de atacama") ~ "1.North",
region_del_centro %in% c("de coquimbo", "de valparaiso")~ "2.Center",
region_del_centro %in% c("del libertador general bernardo ohiggins", "del maule", "del bio-bio") ~ "3.South-center",
region_del_centro %in% c("de la araucania ", "de los rios", "de los lagos") ~ "4.South",
region_del_centro %in% c("de magallanes y la antartica chilena", "aysen") ~ "5.Austral",
TRUE ~ "Metropolitan" # En caso de que algún código no esté especificado
))|>
mutate(prim_sub_licit=ifelse(primary_sub=="alcohol","licit","illicit"))|>
mutate(tr_compliance_status= case_when(grepl("completion", tr_compliance_rec3)~ "Completed", grepl("dropout|discharge", tr_compliance_rec3)~ "Not completed"))|>
mutate(rm_norm= ifelse(macrozone=="Metropolitan",1,0))|>
mutate(res_plan= ifelse(grepl("pr", plan_type),1,0))|>
tidytable::select(-any_of(c("promedio_edad")))|>
left_join( promedios_edades, by=c("hash_key"="HASH_KEY"), multiple="first")|>
mutate(sub_use_onset= case_when(promedio_edad>4~ promedio_edad, T~edad_inicio_consumo))2|
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv<-
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv|>
mutate(adm_age_cat = dplyr::case_when(
adm_age_rec2 >= 18 & adm_age_rec2 < 30 ~ "18-29",
adm_age_rec2 >= 30 & adm_age_rec2 < 45 ~ "30-44",
adm_age_rec2 >= 45 & adm_age_rec2 < 60 ~ "45-59",
adm_age_rec2 >= 60 & adm_age_rec2 < 65 ~ "60-64",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))|>
filter(!is.na(adm_age_rec2))|>
mutate(yr_fr_birth_date_rec= year_fraction(birth_date_rec),
yr_fr_adm_date= year_fraction(adm_date_rec2),
yr_fr_disch_date= year_fraction(disch_date_rec6),
yr_fr_death_date_rec= year_fraction(death_date_rec))|>
mutate(macrozone = case_when(
region_del_centro %in% c("de arica y parinacota", "de tarapaca", "de antofagasta", "de atacama") ~ "1.North",
region_del_centro %in% c("de coquimbo", "de valparaiso")~ "2.Center",
region_del_centro %in% c("del libertador general bernardo ohiggins", "del maule", "del bio-bio") ~ "3.South-center",
region_del_centro %in% c("de la araucania ", "de los rios", "de los lagos") ~ "4.South",
region_del_centro %in% c("de magallanes y la antartica chilena", "aysen") ~ "5.Austral",
TRUE ~ "Metropolitan" # En caso de que algún código no esté especificado
))|>
mutate(prim_sub_licit=ifelse(primary_sub=="alcohol","licit","illicit"))|>
mutate(tr_compliance_status= case_when(grepl("completion", tr_compliance_rec3)~ "Completed", grepl("dropout|discharge", tr_compliance_rec3)~ "Not completed"))|>
mutate(rm_norm= ifelse(macrozone=="Metropolitan",1,0))|>
mutate(res_plan= ifelse(grepl("pr", plan_type),1,0))|>
tidytable::select(-any_of(c("promedio_edad")))|>
left_join( promedios_edades, by=c("hash_key"="HASH_KEY"), multiple="first")|>
mutate(sub_use_onset= case_when(promedio_edad>4~ promedio_edad, T~edad_inicio_consumo))Code
# List of categorical variables
categorical_vars <- c("prim_sub_licit", "adm_age_cat", "res_plan", "sex_rec", "sub_dep_icd10_status",
"macrozone", "tr_compliance_status")
# List of numerical variables
numerical_vars <- c("adm_age_rec2",
"dit_rec6",
"death_age_rec",
"yr_fr_birth_date_rec",
"yr_fr_adm_date",
"yr_fr_disch_date")If we want to check observations by groups
Code
xtabs(~tr_compliance_status+ prim_sub_licit+ res_plan+ sex_rec+ adm_age_cat, data= SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv)
# , , res_plan = 1, sex_rec = female, adm_age_cat = 60-65
# Sólo 2 en not complited e ilicit
xtabs(~status+ tr_compliance_status+ prim_sub_licit+ res_plan+ sex_rec+ adm_age_cat, data= SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv)
#, , prim_sub_licit = illicit, res_plan = 1, sex_rec = female, adm_age_cat = 60-65
#, , prim_sub_licit = illicit, res_plan = 1, sex_rec = male, adm_age_cat = 60-65
#no hay mujeres ni hombres que mueran por ilícitas de 60-65
#tiene pero pocos mueren, sin importar si completan o no, en residenciales, sustancias lícitas (alcohol), 18-29
#prim_sub_licit = licit, res_plan = 1, sex_rec = male, adm_age_cat = 18-29
#prim_sub_licit = licit, res_plan = 1, sex_rec = female, adm_age_cat = 18-29
xtabs(~sex_rec+ adm_age_cat+ yr_adm, data= SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv)Survival format
Check followup period
Code
with(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv, summary(as.numeric(death_date_rec - disch_date_rec6)))
with(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv, summary(as.numeric(death_date_rec - disch_date_rec6)))
with(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv, summary(as.numeric(death_date_rec - disch_date_rec6))) Min. 1st Qu. Median Mean 3rd Qu. Max.
-4016 855 1694 1730 2582 4007
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4016 626 1401 1489 2284 4000
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4016 822 1660 1708 2558 4007
Check birthdate
Code
round(min(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$yr_fr_birth_date_rec),0)
round(max(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$yr_fr_birth_date_rec),0)[1] 1946
[1] 2003
Identify negative followup period
Code
fot_years <- as.numeric((SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$death_date_rec - SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$disch_date_rec6) / 365.25)
psych::describe(
as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv,
difftime(death_date_rec, disch_date_rec6, unit="days"))/ 365.25)
)
table(fot_years<0)
neg_tr_d<- table(as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv,
difftime(death_date_rec, disch_date_rec6, unit="days"))/ 365.25)
<0)
cat("Negative days in treatment\n")
paste0(round((as.numeric(table(fot_years<0)[2])/222945)*100,1),"%")
paste0(round((neg_tr_d[[2]]/222945)*100,1),"%")
cat("Treatments over 3 years\n")
psych::describe(
as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv,
difftime(disch_date_rec6, adm_date_rec2, unit="days"))/ 365.25)
)
table(as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv,
difftime(disch_date_rec6, adm_date_rec2, unit="days"))/ 365.25)>3)
cat("Treatments over 3 years\n")
psych::describe(
as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv,
difftime(disch_date_rec6, adm_date_rec2, unit="days"))/ 365.25)
) vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 77385 4.08 2.96 3.84 3.97 3.33 -11 10.95 21.95 0.25 -0.73 0.01
FALSE TRUE
70290 4180
Negative days in treatment
[1] "1.9%"
[1] "2.8%"
Treatments over 3 years
vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 74470 0.63 0.55 0.48 0.55 0.38 -0.94 7.51 8.45 2.33 9.85 0
FALSE TRUE
74037 433
Treatments over 3 years
vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 74470 0.63 0.55 0.48 0.55 0.38 -0.94 7.51 8.45 2.33 9.85 0
We discard missing values in sex, discharge and death dates and negative follow-up periods.
Code
disch_after_cens_death<-
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv |>nrow()-
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv |>
filter(!is.na(disch_date_rec6),
!is.na(death_date_rec),
disch_date_rec6 < death_date_rec, # evita seguimiento negativo
!is.na(sex_rec)) |> nrow()
disch_after_cens_death
paste0(round((disch_after_cens_death/222945)*100,1),"%")[1] 4256
[1] "1.9%"
We coded the variable drug dependence (ICD-10) and translated, as well as sex, and added the variable age at discharge coded by groups (18-29, 30-44, 45-59, 60+).
Code
clean_df <- SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv |>
(\(df) {
cat("Before discarding missing or discharge dates \n")
print(nrow(df))
df
})() |>
filter(!is.na(disch_date_rec6),
!is.na(death_date_rec),
disch_date_rec6 < death_date_rec, # evita seguimiento negativo
adm_date_rec2 < disch_date_rec6, # para sacar 831d9f7b2771ce2701ae4a4417f26e70f2e2acb21d21c6fe7ff1e766c9792d3a on días tto neg
!is.na(sex_rec)) |> # grupos de agregación sin NA
(\(df) {
cat("After discarding missing or discharge dates \n")
print(nrow(df))
df
})() |>
mutate(
# sub_dep_icd10_status: reference = "Hazardous consumption"
sub_dep_icd10_status = factor(
sub_dep_icd10_status,
levels = c("Hazardous consumption", "Drug dependence")
),
# sex_rec: reference = "Male" (rename from lower-case if needed)
sex_rec = case_when( # optional renaming step
sex_rec == "male" ~ "Male",
sex_rec == "female" ~ "Female",
TRUE ~ sex_rec) %>%
factor(levels = c("Male", "Female"))
)|>
mutate(disch_age_cat = dplyr::case_when(
disch_age_rec >= 18 & disch_age_rec < 30 ~ "18-29",
disch_age_rec >= 30 & disch_age_rec < 45 ~ "30-44",
disch_age_rec >= 45 & disch_age_rec < 60 ~ "45-59",
disch_age_rec >= 60 & disch_age_rec < 86 ~ "60+",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))
cat("Number of rows after cleaning the database:\n")
nrow(clean_df)After discarding missing or discharge dates
Before discarding missing or discharge dates
[1] 74470
[1] 70064
Number of rows after cleaning the database:
[1] 70064
We formatted the database in survival setting (clean_df_corr_surv).
Code
clean_df_corr <- clean_df %>%
mutate(
year_death = year(death_date_rec),
# age_death = as.numeric(difftime(death_date_rec, birth_date_rec,
# units = "days")) / 365.25#/ 365.2425#365.25
#age_death= interval(birth_date_rec, death_date_rec)|>as.numeric('years')
age_death= time_length(interval(birth_date_rec, death_date_rec), unit="year")
)
# 2. Construct 'per' (2010-2021) similar to lexpand()
# (intervals closed at left, open at right)
breaks_vec <- seq(as.Date("2010-01-01"), as.Date("2022-01-01"), by = "year")
clean_df_corr <- clean_df_corr %>%
mutate(
per = cut(
death_date_rec,
breaks = breaks_vec,
right = FALSE, # [2010, 2011)
labels = 2010:2021
) %>% as.integer()
)
# 3. Label exclusion criteria
clean_df_corr <- clean_df_corr %>%
mutate(
excl_reason = case_when(
status != 1 ~ "alive / censored",
!is.na(per) & age_death < 76 ~ "included",
is.na(per) & age_death >= 76 ~ "age≥76 & year≥2022",
is.na(per) ~ "year≥2022",
age_death >= 76 ~ "age≥76",
TRUE ~ "otro"
)
)
# 4. Subgroups
muertes_per2021 <- clean_df_corr %>% filter(status == 1, per == 2021)
muertes_fuera_per <- clean_df_corr %>% filter(status == 1, is.na(per)) # ≥ 2022
muertes_age65plus <- clean_df_corr %>% filter(status == 1, age_death >= 76)
excluidos <- clean_df_corr %>%
filter(status == 1, excl_reason != "incluido") %>%
dplyr::select(rn, hash_key, death_date_rec, age_death, excl_reason)
cat("Maximum age at discharge:\n")
max(excluidos$age_death)
#[1] 74.42574
# 5. Summary of exclusions
# clean_df_corr %>%
# filter(status == 1) %>%
# count(excl_reason, name = "n") %>%
# arrange(desc(n))
start_fup <- as.Date("2010-01-01")
end_fup <- as.Date("2020-12-31")
pyrs_raw <- clean_df_corr %>%
## Exclusion criteria of SIR function ------------------
mutate(
age_death= time_length(interval(birth_date_rec, death_date_rec), unit="year")
) %>%
filter(
## Only people in SIR function
(is.na(age_death) | age_death < 76), # top age 75
death_date_rec >= start_fup | is.na(death_date_rec),
disch_date_rec6 <= end_fup # entry ≤ 31-dic-2021
) %>%
## Exit date definiton (death/censorship) ----------------------
mutate(
exit_date = coalesce(death_date_rec, end_fup), # Alive → 31-dic-2021
exit_date = pmin(exit_date, end_fup), # Cut deaths after 2021
follow_up_days = as.numeric(exit_date - disch_date_rec6),
pyrs= time_length(interval(disch_date_rec6, exit_date), unit="year")
) %>%
(\(df) {
cat("Deaths \n")
print(janitor::tabyl(df,status))
cat("Number of rows \n")
print(nrow(df))
df ->>clean_df_corr_surv
})() |>
summarise(total_pyrs = sum(pyrs, na.rm = TRUE)) %>%
pull()
cat("Number of total person-years:\n")
pyrs_raw # debería ~ 353826
cat("Difference in person-years, manual vs. SIR:\n")
pyrs_raw-353826
paste0(round(((pyrs_raw-353826)/353826)*100,3),"% of the PYs")
cat("Person-years (for article)\n")
psych::describe(clean_df_corr_surv$pyrs)Maximum age at discharge:
[1] 74.42623
Deaths
status n percent
0 67068 0.9572391
1 2996 0.0427609
Number of rows
[1] 70064
Number of total person-years:
[1] 353843
Difference in person-years, manual vs. SIR:
[1] 16.99475
[1] "0.005% of the PYs"
Person-years (for article)
vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 70064 5.05 2.8 4.9 4.99 3.29 0 10.97 10.97 0.16 -0.98 0.01
Code
cat("Maximum age at death\n")
clean_df_corr_surv|> mutate(edad_salida = as.numeric((pmin(death_date_rec, disch_date_rec6 + follow_up_days) - birth_date_rec) / 365.2425)) |>
summarise(max_age = max(edad_salida, na.rm = TRUE))
invisible("Trying to format it similar to lexpand function")
biostat3::survRate(Surv(pyrs, status==1)~1, data=clean_df_corr_surv)|>
dplyr::mutate(across(c("rate", "lower", "upper"),~sprintf("%1.1f",.*1000)))
sr_poredadsexanio <- biostat3::survRate(Surv(pyrs, status==1) ~ strata(sex_rec)+
strata(agegroup)+ strata(year), data= clean_df_corr_surv|>
mutate(agegroup = cut(
disch_age_rec, # la variable de edad
breaks = c(18, 30, 45, 60, 76), # límites (incluye 15, excluye 65)
right = FALSE, # intervalo izquierdo cerrado [15–30)
labels = c("18-29", "30-44", "45-59", "60+"),
include.lowest = TRUE # 15 entra en el primer tramo
),
year= lubridate::year(as.Date(disch_date_num_rec6))
)|>
filter(disch_age_rec>17, disch_age_rec<64)
)Maximum age at death
max_age
1 75.62921
tstop event rate lower upper
1 353843 2996 8.5 8.2 8.8
We replicate the formatting of variables (translation and coding of discharge date) for patients in the last treatment.
Code
#Table SXX. All-cause mortality rate and standardized mortality rate for
#patients who accessed SUD treatment by sex and age group, last treatment available
cat(paste0("QWhy the additional ", 3029-2996), " deaths in the last treatment? \n")
clean_df_b <- SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv |>
(\(df) {
cat("Before discarding missing or discharge dates \n")
print(nrow(df))
df
})() |>
filter(!is.na(disch_date_rec6),
!is.na(death_date_rec),
disch_date_rec6 < death_date_rec, # evita seguimiento negativo
adm_date_rec2 < disch_date_rec6, # para sacar 831d9f7b2771ce2701ae4a4417f26e70f2e2acb21d21c6fe7ff1e766c9792d3a con días tto neg
!is.na(sex_rec)) |> # grupos de agregación sin NA
(\(df) {
cat("After discarding missing or discharge dates \n")
print(nrow(df))
df
})() |>
mutate(
# sub_dep_icd10_status: reference = "Hazardous consumption"
sub_dep_icd10_status = factor(
sub_dep_icd10_status,
levels = c("Hazardous consumption", "Drug dependence")
),
# sex_rec: reference = "Male" (rename from lower-case if needed)
sex_rec = case_when( # optional renaming step
sex_rec == "male" ~ "Male",
sex_rec == "female" ~ "Female",
TRUE ~ sex_rec) %>%
factor(levels = c("Male", "Female"))
)|>
mutate(disch_age_cat = dplyr::case_when(
disch_age_rec >= 18 & disch_age_rec < 30 ~ "18-29",
disch_age_rec >= 30 & disch_age_rec < 45 ~ "30-44",
disch_age_rec >= 45 & disch_age_rec < 60 ~ "45-59",
disch_age_rec >= 60 & disch_age_rec < 86 ~ "60+",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))
nrow(clean_df_b)QWhy the additional 33 deaths in the last treatment?
After discarding missing or discharge dates
Before discarding missing or discharge dates
[1] 77385
[1] 70913
[1] 70913
Code
warning(paste0("2025-08-03: ACC warned me that I may leaving deaths out due to being before tratment finishes: ",(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_rec6 > death_date_rec, death_date_rec!="2020-12-31") ) |> nrow(), ", so i need to improve the change in 2025-06-19 to account for these users"))Warning: 2025-08-03: ACC warned me that I may leaving deaths out due to being before tratment finishes: 369, so i need to improve the change in 2025-06-19 to account for these users
Code
invisible("Completed treatments")
# (SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_corr > death_date_rec, as.Date(disch_date_corr)<"2021-01-01") ) |> dplyr::select(hash_key, adm_age_rec2, yr_fr_adm_date, yr_fr_disch_date, yr_fr_death_date_rec, tr_compliance_status) |> mutate(diff=yr_fr_disch_date-yr_fr_death_date_rec) |> filter(tr_compliance_status=="Completed") |> nrow()
invisible("17")
invisible("Differences over than one year")
# (SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_corr > death_date_rec, as.Date(disch_date_corr)<"2021-01-01") ) |> dplyr::select(hash_key, adm_age_rec2, yr_fr_adm_date, yr_fr_disch_date, yr_fr_death_date_rec, tr_compliance_status) |> mutate(diff=yr_fr_disch_date-yr_fr_death_date_rec) |> filter(diff>1)
invisible("80")
# (SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_corr > death_date_rec, as.Date(disch_date_corr)<"2021-01-01") ) |> dplyr::select(hash_key, adm_age_rec2, yr_fr_adm_date, yr_fr_disch_date, yr_fr_death_date_rec, tr_compliance_status) |> View()
invisible("58 have admission date greater than death date")
# (SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_corr > death_date_rec, as.Date(disch_date_corr)<"2021-01-01") ) |> dplyr::select(hash_key, adm_age_rec2, yr_fr_adm_date, yr_fr_disch_date, yr_fr_death_date_rec, tr_compliance_status) |> mutate(diff=yr_fr_disch_date-yr_fr_death_date_rec, diff2=yr_fr_adm_date-yr_fr_death_date_rec) |> filter(diff2>1)
cat("2025-06-19: Add median days in treatment if missing discharge date\n")
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr <-
ifelse(is.na(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_rec6),
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_date_rec2+as.numeric(quantile(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$dit_rec6, .5)), SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_rec6)
cat("2025-08-03: Add median days in treatment if missing discharge date, or if its over death date\n")
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr2 <-
ifelse(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr>SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$death_date_rec & !is.na(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$death_date_rec)& as.Date(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr)<"2021-01-01",
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_date_rec2+as.numeric(quantile(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$dit_rec6, .5)), SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr)
clean_df_c <- SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv|>
mutate(disch_date_corr= as.Date(disch_date_corr2))|> #changed at 25-08-03
(\(df) {
cat("Before discarding missing or discharge dates \n")
print(nrow(df))
df
})() |>
filter(!is.na(disch_date_corr),
!is.na(death_date_rec),
disch_date_corr <= death_date_rec, # evita seguimiento negativo #2025-08-03: changed censorship to allow more deceased to enter
adm_date_rec2 < disch_date_corr, # para sacar 831d9f7b2771ce2701ae4a4417f26e70f2e2acb21d21c6fe7ff1e766c9792d3a on días tto neg
!is.na(sex_rec)) |> # grupos de agregación sin NA
(\(df) {
cat("After discarding missing or discharge dates \n")
print(nrow(df))
df
})() |>
mutate(
# sub_dep_icd10_status: reference = "Hazardous consumption"
sub_dep_icd10_status = factor(
sub_dep_icd10_status,
levels = c("Hazardous consumption", "Drug dependence")
),
# sex_rec: reference = "Male" (rename from lower-case if needed)
sex_rec = case_when( # optional renaming step
sex_rec == "male" ~ "Male",
sex_rec == "female" ~ "Female",
TRUE ~ sex_rec) %>%
factor(levels = c("Male", "Female"))
)|>
mutate(disch_age_cat = dplyr::case_when(
disch_age_rec >= 18 & disch_age_rec < 30 ~ "18-29",
disch_age_rec >= 30 & disch_age_rec < 45 ~ "30-44",
disch_age_rec >= 45 & disch_age_rec < 60 ~ "45-59",
disch_age_rec >= 60 & disch_age_rec < 86 ~ "60+",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
))
cat("Number of rows after cleaning the database:\n")
nrow(clean_df_c)
cat("2025-08-03: To avoid dropped XX rows where entry == exit\n")
clean_df_c$disch_date_corr <- as.Date(
ifelse(clean_df_c$disch_date_corr == clean_df_c$death_date_rec,
as.integer(clean_df_c$disch_date_corr) - 1L,
as.integer(clean_df_c$disch_date_corr)),
origin = "1970-01-01"
)
clean_df_c_corr <- clean_df_c%>%
mutate(
year_death = year(death_date_rec),
# age_death = as.numeric(difftime(death_date_rec, birth_date_rec,
# units = "days")) / 365.25#/ 365.2425#365.25
#age_death= interval(birth_date_rec, death_date_rec)|>as.numeric('years')
age_death= time_length(interval(birth_date_rec, death_date_rec), unit="year")
)
# 2. Construct 'per' (2010-2021) similar to lexpand()
# (intervals closed at left, open at right)
breaks_vec <- seq(as.Date("2010-01-01"), as.Date("2021-01-01"), by = "year")
clean_df_c_corr <- clean_df_c_corr %>%
mutate(
per = cut(
death_date_rec,
breaks = breaks_vec,
right = FALSE, # [2010, 2011)
labels = 2010:2020 #modify from 2021 to 2020
) %>% as.integer()
)
# 3. Label exclusion criteria
clean_df_c_corr <- clean_df_c_corr%>%
mutate(
excl_reason = case_when(
status != 1 ~ "alive / censored",
!is.na(per) & age_death < 76 ~ "included",
is.na(per) & age_death >= 76 ~ "age≥76 & year≥2022",
is.na(per) ~ "year≥2022",
age_death >= 76 ~ "age≥76",
TRUE ~ "otro"
)
)
# 4. Subgroups
muertes_per2021_c <- clean_df_c_corr %>% filter(status == 1, per == 2021)
muertes_fuera_per_c <- clean_df_c_corr %>% filter(status == 1, is.na(per)) # ≥ 2022
muertes_age65plus_c<- clean_df_c_corr %>% filter(status == 1, age_death >= 76)
excluidos <- clean_df_c_corr %>%
filter(status == 1, excl_reason != "included") %>%
dplyr::select(rn, hash_key, death_date_rec, age_death, excl_reason)
cat("Maximum age at discharge:\n")
max(excluidos$age_death, na.rm=T)Warning in max(excluidos$age_death, na.rm = T): ningun argumento finito para max; retornando -Inf
Code
#[1] 74.42623
# 5. Summary of exclusions
# clean_df_corr %>%
# filter(status == 1) %>%
# count(excl_reason, name = "n") %>%
# arrange(desc(n))
start_fup_c <- as.Date("2010-01-01")
end_fup_c <- as.Date("2020-12-31")
pyrs_raw_c <- clean_df_c_corr %>%
## Exclusion criteria of SIR function ------------------
mutate(
age_death= time_length(interval(birth_date_rec, death_date_rec), unit="year")
) %>%
filter(
## Only people in SIR function
(is.na(age_death) | age_death < 76), # top age 75
death_date_rec >= start_fup_c | is.na(death_date_rec),
disch_date_rec6 <= end_fup_c # entry ≤ 31-dic-2021
) %>%
## Exit date definiton (death/censorship) ----------------------
mutate(
exit_date = coalesce(death_date_rec, end_fup_c), # Alive → 31-dic-2021
exit_date = pmin(exit_date, end_fup_c), # Cut deaths after 2021
follow_up_days = as.numeric(exit_date - disch_date_corr),
pyrs= time_length(interval(disch_date_corr, exit_date), unit="year")
) %>%
(\(df) {
cat("Deaths \n")
print(janitor::tabyl(df,status))
cat("Number of rows \n")
print(nrow(df))
df ->>clean_df_corr_surv_c
})() |>
summarise(total_pyrs = sum(pyrs, na.rm = TRUE)) %>%
pull()
cat("Number of total person-years:\n")
pyrs_raw_c # debería ~ 353826
cat("Difference in person-years, manual vs. SIR:\n")
pyrs_raw_c-4249152
paste0(round(((pyrs_raw_c-4249152)/4249152)*100,3),"% of the PYs")
cat("2025-08-03: Differences with previous database with unfinished treatments but w/o patients deceased before finishing them\n")
pyrs_raw_c-416166
cat("Added patients: 2025-08-03: ");table(clean_df_corr_surv_c$status)[[2]]-3643
cat("Mean pyrs by user: \n")
round((table(clean_df_corr_surv_c$status)[[2]]-3643)/(pyrs_raw_c-416166),2)2025-06-19: Add median days in treatment if missing discharge date
2025-08-03: Add median days in treatment if missing discharge date, or if its over death date
After discarding missing or discharge dates
Before discarding missing or discharge dates
[1] 88774
[1] 83782
Number of rows after cleaning the database:
[1] 83782
2025-08-03: To avoid dropped XX rows where entry == exit
Maximum age at discharge:
[1] -Inf
Deaths
status n percent
0 79965 0.95444129
1 3817 0.04555871
Number of rows
[1] 83782
Number of total person-years:
[1] 416305.6
Difference in person-years, manual vs. SIR:
[1] -3832846
[1] "-90.203% of the PYs"
2025-08-03: Differences with previous database with unfinished treatments but w/o patients deceased before finishing them
[1] 139.5834
Added patients: 2025-08-03: [1] 174
Mean pyrs by user:
[1] 1.25
Descriptives
Code
categorical_vars_corr<-
c(categorical_vars,"disch_age_cat")
# Create summaries for categorical variables
categorical_summaries_cln <- lapply(
c(categorical_vars_corr),# <- object you iterate over
summarize_categorical_tt, # <- function
.data = clean_df # <- extra argument “.data =”
)Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0. ℹ Please use all_of() or any_of() instead. # Was: data %>% select(var)
# Now: data %>% select(all_of(var))
See https://tidyselect.r-lib.org/reference/faq-external-vector.html.
Code
names(categorical_summaries_cln) <- categorical_vars_corr
# Create summaries for numerical variables
numerical_summaries_cln <- lapply(
c(numerical_vars, "disch_age_rec"),# <- object you iterate over
\(v) summarize_numerical_tt(clean_df, v)
)
names(numerical_summaries_cln) <- c(numerical_vars, "disch_age_rec")Code
table_one_clean <- CreateTableOne(
vars = c(categorical_vars, numerical_vars,"disch_age_cat", "disch_age_rec"),
data = clean_df,
factorVars = c(categorical_vars,"disch_age_cat"),
strata = "status",
addOverall = TRUE, # Incluir totales
test = FALSE, # Realizar pruebas de diferencias
smd = TRUE # Incluir SMDs
)
table_one_clean_print <- print(table_one_clean,
nonnormal = c(numerical_vars, "disch_age_rec"),
formatOptions = list(big.mark = ","),
quote = TRUE,
noSpaces = TRUE,
showAllLevels = TRUE, # Mostrar todos los niveles de variables categóricas
missing = FALSE, # Incluir valores perdidos
explain = FALSE, # Añadir explicación de las variables
digits = c(adm_age_rec2 = 1, dit_rec6 = 1, yr_fr_birth_date_rec = 1), smd = TRUE)
table_one_clean_print |>
knitr::kable("html", digits=1) |>
kableExtra::kable_classic() |>
kableExtra::scroll_box(height="400px")
write.table(table_one_clean_print, file = paste0(getwd(),"/_out/table_one_clean.txt"), sep = "\t", row.names = FALSE) "Stratified by status"
"" "level"
"n" ""
"prim_sub_licit" "illicit"
"" "licit"
"adm_age_cat" "18-29"
"" "30-44"
"" "45-59"
"" "60-64"
"res_plan" "0"
"" "1"
"sex_rec" "Male"
"" "Female"
"sub_dep_icd10_status" "Hazardous consumption"
"" "Drug dependence"
"macrozone" "1.North"
"" "2.Center"
"" "3.South-center"
"" "4.South"
"" "5.Austral"
"" "Metropolitan"
"tr_compliance_status" "Completed"
"" "Not completed"
"adm_age_rec2" ""
"dit_rec6" ""
"death_age_rec" ""
"yr_fr_birth_date_rec" ""
"yr_fr_adm_date" ""
"yr_fr_disch_date" ""
"disch_age_cat" "18-29"
"" "30-44"
"" "45-59"
"" "60+"
"disch_age_rec" ""
"Stratified by status"
"" "Overall"
"n" "70,064"
"prim_sub_licit" "46048 (65.7)"
"" "24016 (34.3)"
"adm_age_cat" "24341 (34.7)"
"" "31485 (44.9)"
"" "13098 (18.7)"
"" "1140 (1.6)"
"res_plan" "61539 (87.8)"
"" "8525 (12.2)"
"sex_rec" "53331 (76.1)"
"" "16733 (23.9)"
"sub_dep_icd10_status" "20248 (28.9)"
"" "49816 (71.1)"
"macrozone" "7988 (11.4)"
"" "7310 (10.4)"
"" "11943 (17.0)"
"" "3332 (4.8)"
"" "782 (1.1)"
"" "38709 (55.2)"
"tr_compliance_status" "19185 (27.4)"
"" "50879 (72.6)"
"adm_age_rec2" "34.12 [27.43, 42.86]"
"dit_rec6" "165.00 [92.00, 290.00]"
"death_age_rec" "39.00 [33.00, 48.00]"
"yr_fr_birth_date_rec" "1,980.99 [1,972.24, 1,987.71]"
"yr_fr_adm_date" "2,015.32 [2,013.08, 2,017.50]"
"yr_fr_disch_date" "2,015.93 [2,013.66, 2,018.08]"
"disch_age_cat" "22977 (32.8)"
"" "31881 (45.5)"
"" "13739 (19.6)"
"" "1467 (2.1)"
"disch_age_rec" "34.66 [27.94, 43.50]"
"Stratified by status"
"" "0"
"n" "67,068"
"prim_sub_licit" "44794 (66.8)"
"" "22274 (33.2)"
"adm_age_cat" "23896 (35.6)"
"" "30287 (45.2)"
"" "11895 (17.7)"
"" "990 (1.5)"
"res_plan" "59051 (88.0)"
"" "8017 (12.0)"
"sex_rec" "50935 (75.9)"
"" "16133 (24.1)"
"sub_dep_icd10_status" "19545 (29.1)"
"" "47523 (70.9)"
"macrozone" "7695 (11.5)"
"" "7085 (10.6)"
"" "11433 (17.0)"
"" "3167 (4.7)"
"" "730 (1.1)"
"" "36958 (55.1)"
"tr_compliance_status" "18514 (27.6)"
"" "48554 (72.4)"
"adm_age_rec2" "33.81 [27.27, 42.35]"
"dit_rec6" "166.00 [92.00, 290.00]"
"death_age_rec" "39.00 [33.00, 48.00]"
"yr_fr_birth_date_rec" "1,981.34 [1,972.76, 1,987.93]"
"yr_fr_adm_date" "2,015.39 [2,013.15, 2,017.55]"
"yr_fr_disch_date" "2,016.01 [2,013.73, 2,018.16]"
"disch_age_cat" "22566 (33.6)"
"" "30699 (45.8)"
"" "12526 (18.7)"
"" "1277 (1.9)"
"disch_age_rec" "34.35 [27.77, 43.01]"
"Stratified by status"
"" "1" "SMD"
"n" "2,996" ""
"prim_sub_licit" "1254 (41.9)" "0.517"
"" "1742 (58.1)" ""
"adm_age_cat" "445 (14.9)" "0.657"
"" "1198 (40.0)" ""
"" "1203 (40.2)" ""
"" "150 (5.0)" ""
"res_plan" "2488 (83.0)" "0.143"
"" "508 (17.0)" ""
"sex_rec" "2396 (80.0)" "0.097"
"" "600 (20.0)" ""
"sub_dep_icd10_status" "703 (23.5)" "0.129"
"" "2293 (76.5)" ""
"macrozone" "293 (9.8)" "0.139"
"" "225 (7.5)" ""
"" "510 (17.0)" ""
"" "165 (5.5)" ""
"" "52 (1.7)" ""
"" "1751 (58.4)" ""
"tr_compliance_status" "671 (22.4)" "0.120"
"" "2325 (77.6)" ""
"adm_age_rec2" "43.41 [34.32, 51.16]" "0.698"
"dit_rec6" "156.00 [91.00, 288.00]" "0.020"
"death_age_rec" "47.00 [38.00, 54.00]" "0.521"
"yr_fr_birth_date_rec" "1,970.72 [1,963.02, 1,979.52]" "0.826"
"yr_fr_adm_date" "2,013.82 [2,012.02, 2,015.66]" "0.507"
"yr_fr_disch_date" "2,014.42 [2,012.50, 2,016.34]" "0.507"
"disch_age_cat" "411 (13.7)" "0.653"
"" "1182 (39.5)" ""
"" "1213 (40.5)" ""
"" "190 (6.3)" ""
"disch_age_rec" "43.99 [34.72, 51.81]" "0.691"
| "level" | "Overall" | "0" | "1" | "SMD" | |
|---|---|---|---|---|---|
| "n" | 70,064 | 67,068 | 2,996 | ||
| "prim_sub_licit" | illicit | 46048 (65.7) | 44794 (66.8) | 1254 (41.9) | 0.517 |
| "" | licit | 24016 (34.3) | 22274 (33.2) | 1742 (58.1) | |
| "adm_age_cat" | 18-29 | 24341 (34.7) | 23896 (35.6) | 445 (14.9) | 0.657 |
| "" | 30-44 | 31485 (44.9) | 30287 (45.2) | 1198 (40.0) | |
| "" | 45-59 | 13098 (18.7) | 11895 (17.7) | 1203 (40.2) | |
| "" | 60-64 | 1140 (1.6) | 990 (1.5) | 150 (5.0) | |
| "res_plan" | 0 | 61539 (87.8) | 59051 (88.0) | 2488 (83.0) | 0.143 |
| "" | 1 | 8525 (12.2) | 8017 (12.0) | 508 (17.0) | |
| "sex_rec" | Male | 53331 (76.1) | 50935 (75.9) | 2396 (80.0) | 0.097 |
| "" | Female | 16733 (23.9) | 16133 (24.1) | 600 (20.0) | |
| "sub_dep_icd10_status" | Hazardous consumption | 20248 (28.9) | 19545 (29.1) | 703 (23.5) | 0.129 |
| "" | Drug dependence | 49816 (71.1) | 47523 (70.9) | 2293 (76.5) | |
| "macrozone" | 1.North | 7988 (11.4) | 7695 (11.5) | 293 (9.8) | 0.139 |
| "" | 2.Center | 7310 (10.4) | 7085 (10.6) | 225 (7.5) | |
| "" | 3.South-center | 11943 (17.0) | 11433 (17.0) | 510 (17.0) | |
| "" | 4.South | 3332 (4.8) | 3167 (4.7) | 165 (5.5) | |
| "" | 5.Austral | 782 (1.1) | 730 (1.1) | 52 (1.7) | |
| "" | Metropolitan | 38709 (55.2) | 36958 (55.1) | 1751 (58.4) | |
| "tr_compliance_status" | Completed | 19185 (27.4) | 18514 (27.6) | 671 (22.4) | 0.120 |
| "" | Not completed | 50879 (72.6) | 48554 (72.4) | 2325 (77.6) | |
| "adm_age_rec2" | 34.12 [27.43, 42.86] | 33.81 [27.27, 42.35] | 43.41 [34.32, 51.16] | 0.698 | |
| "dit_rec6" | 165.00 [92.00, 290.00] | 166.00 [92.00, 290.00] | 156.00 [91.00, 288.00] | 0.020 | |
| "death_age_rec" | 39.00 [33.00, 48.00] | 39.00 [33.00, 48.00] | 47.00 [38.00, 54.00] | 0.521 | |
| "yr_fr_birth_date_rec" | 1,980.99 [1,972.24, 1,987.71] | 1,981.34 [1,972.76, 1,987.93] | 1,970.72 [1,963.02, 1,979.52] | 0.826 | |
| "yr_fr_adm_date" | 2,015.32 [2,013.08, 2,017.50] | 2,015.39 [2,013.15, 2,017.55] | 2,013.82 [2,012.02, 2,015.66] | 0.507 | |
| "yr_fr_disch_date" | 2,015.93 [2,013.66, 2,018.08] | 2,016.01 [2,013.73, 2,018.16] | 2,014.42 [2,012.50, 2,016.34] | 0.507 | |
| "disch_age_cat" | 18-29 | 22977 (32.8) | 22566 (33.6) | 411 (13.7) | 0.653 |
| "" | 30-44 | 31881 (45.5) | 30699 (45.8) | 1182 (39.5) | |
| "" | 45-59 | 13739 (19.6) | 12526 (18.7) | 1213 (40.5) | |
| "" | 60+ | 1467 (2.1) | 1277 (1.9) | 190 (6.3) | |
| "disch_age_rec" | 34.66 [27.94, 43.50] | 34.35 [27.77, 43.01] | 43.99 [34.72, 51.81] | 0.691 |
Lexis format
We generated three derivative datasets to support the mortality analyses. First, c_SISTRAT_c1 expands the original cohort (clean_df) into a Lexis structure with lexpand(), splitting each subject’s follow-up by calendar year (2010–2020) and attained-age bands (18-29, 30-44, 45-59, 60-75 years). Each resulting row holds the person-years (pyrs) and deaths (from0to1) for a unique combination of age group, calendar year and sex—ready for fast calculation of standardized mortality ratios. c_SISTRAT_c1_b repeats the identical procedure on the database of the last followed treatment clean_df_b, providing an audit trail for sensitivity checks. Finally, c_SISTRAT_c1_lex is built with Epi::Lexis(), keeping the data at episode level: entry is the discharge date (calendar time and age), exit is death or censoring, and exit status codes the event. This individual-level object lets us run detailed time-to-event models and plot trajectories, while the aggregated tables handle all rate-based summaries. In 2025-06-19, we added clean_df_c to check the SMRs of patients without finished treatments, by imputing days in treatment for patients with incomplete treatments with the median of days in treatment (i.e., 151 days).
Code
c_SISTRAT_c1 <- lexpand( clean_df,
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_amb <- lexpand(subset(clean_df, res_plan==0),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_res <- lexpand(subset(clean_df, res_plan==1),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_licit <- lexpand(subset(clean_df, prim_sub_licit=="licit"),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_illicit <- lexpand(subset(clean_df, prim_sub_licit=="illicit"),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_comp <- lexpand(subset(clean_df, !grepl("Not", tr_compliance_status)),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_not_comp <- lexpand(subset(clean_df, grepl("Not", tr_compliance_status)),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b <- lexpand( clean_df_b,
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
breaks = list(per = seq(2010, 2021, by = 1), age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_amb <- lexpand(subset(clean_df_b, res_plan==0),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_res <- lexpand(subset(clean_df_b, res_plan==1),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_licit <- lexpand(subset(clean_df_b, prim_sub_licit=="licit"),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_illicit <- lexpand(subset(clean_df_b, prim_sub_licit=="illicit"),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_comp <- lexpand(subset(clean_df_b, !grepl("Not", tr_compliance_status)),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_not_comp <- lexpand(subset(clean_df_b, grepl("Not", tr_compliance_status)),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c <- popEpi::lexpand( clean_df_c,
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_corr,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_amb <- lexpand(subset(clean_df_c, res_plan==0),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_corr,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_res <- lexpand(subset(clean_df_c, res_plan==1),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_corr,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_licit <- lexpand(subset(clean_df_c, prim_sub_licit=="licit"),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_corr,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_illicit <- lexpand(subset(clean_df_c, prim_sub_licit=="illicit"),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_corr,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_comp <- lexpand(subset(clean_df_c, !grepl("Not", tr_compliance_status)),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_corr,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_not_comp <- lexpand(subset(clean_df_c, grepl("Not", tr_compliance_status)),
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_corr,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))Chilean population statistics & UN mortality-tables
We imported population projections for individuals aged 18 to 76 covering the years 2010 to 2020. After cleaning and reshaping the data, we grouped it by year, age, and sex. Next, we loaded mortality tables to weight by population—by year, sex, and age group to support our further analyses.
We converted the UN 5-year intervals abridged life-table (cons_rate_sex_5x1) into the reference rates used for the SIR. First, we retrieved each interval’s starting age (start_age) and width (n = 1 year for age 0, 4 years for 1–4, 5 years thereafter). Using \(q_x\) we derived the instantaneous hazard for the interval, λₓ = –log(1 – qₓ)/n, and reconstructed the number of deaths (lx × qₓ) and the corresponding person-years of exposure (deaths / λₓ). We then collapsed the life-table into the study’s four attained-age bands—18-29, 30-44, 45-59, 60-75 years—summing deaths and exposure within each (by calendar year and sex) and finally computed the average population hazard λ = deaths / PY for every sex-year-age-band cell. We did not use the \(m_x\) (crude central death rate) . The resulting table (popmort_lt_cl_banded) contains sex, year, agegroup, and haz (population-weighted hazards), and serves as the external mortality reference in the SIR calculation.
Code
proy_ine_com_18_75<-
rio::import("https://www.ine.gob.cl/docs/default-source/proyecciones-de-poblacion/cuadros-estadisticos/base-2017/ine_estimaciones-y-proyecciones-2002-2035_base-2017_comunas0381d25bc2224f51b9770a705a434b74.csv?sfvrsn=b6e930a7_3&download=true")|>
tidyr::pivot_longer(cols = dplyr::starts_with("Poblacion"),
names_to = "anio",
values_to = "poblacion")|>
dplyr::mutate(anio= gsub("Poblacion ","",anio), anio=as.numeric(anio))|>
dplyr::filter(anio>=2010 & anio<=2020, Edad>=18 & Edad<76)|>
dplyr::mutate(sex_rec= ifelse(`Sexo (1=Hombre 2=Mujer)`==2,"female","male"))|>
group_by(anio, Edad, sex_rec)|>
summarise(pop=sum(poblacion, na.rm=T))|>
ungroup()|>
rename("age"="Edad", "year"="anio")Code
mx_1x1_banded <- mx_1x1_filt2|>
mutate(
year = Year, # renombramos para que coincida con popmort
# agegroup =
# case_when( # crea bandas con fcase() de tidytable
# Age >= 18 & Age <= 29~ "18-29",
# Age >= 30 & Age <= 44~ "30-44",
# Age >= 45 & Age <= 59~ "45-59",
# Age >= 60 & Age <= 76~ "60+",
# T~ NA_character_
agegroup = case_when( # crea bandas con fcase() de tidytable
Age >= 18 & Age <= 29~ 18,
Age >= 30 & Age <= 44~ 30,
Age >= 45 & Age <= 59~ 45,
Age >= 60 & Age <= 76~ 60,
T~ NA_real_
)
)|>
filter(!is.na(agegroup))|> # descarta edades fuera de 18-65
left_join(proy_ine_com_18_75, by= c("Year"="year", "sex"="sex_rec", "Age"="age"))|>
summarise(
haz = weighted.mean(lambda_p_yrs, w = pop, na.rm = TRUE), # tasa media del grupo
mx = weighted.mean(mx, w = pop, na.rm = TRUE),
.by = c(year, sex, agegroup),
#.groups = "drop"
)|>
arrange(year, sex, agegroup)|>
ungroup()|>
mutate(sex= ifelse(sex=="female","Female","Male"))Stantardized mortality rates- Indirect
We then estimated the cohort’s excess mortality with sir(). The function compares the deaths and person-years in the aggregated Lexis file (c_SISTRAT_c1, columns from0to1 and pyrs) with the population hazards prepared above (mx_1x1_banded, column haz). By specifying adjust = c("agegroup", "year", "sex"), the standardized mortality ratio (SIR) is stratified on the same four attained-age bands, calendar year, and sex that define the reference table. The call returns:
- the overall SIR with profile-likelihood 95 % CI,
- the expected deaths under population rates,
- total person-years, and
- the excess absolute risk (EAR), i.e. additional deaths per 1 000 PY (deaths each 100 patients followed over a 10-year period).
These values quantify both the relative (SIR) and absolute (EAR) mortality burden of the treated SUD cohort versus the Chilean general population.
The single-row dataset (clean_df_corr_surv) parks every patient’s entire follow-up in the calendar year and age they had at admission. All their person-years therefore stay in strata with younger ages and earlier years, where population mortality rates are low, so the expected deaths are artificially small and the SMR inflates. lexpand() creates c_SISTRAT_c1, a Lexis table that splits each record every time the person moves to a new calendar year or ages into a new band (18-29, 30-44, 45-59, 60-75). Person-years are thus re-allocated to the true year-age cells, many of which carry higher background rates; expected deaths rise and the SIR drops. Bottom line: for any SMR/SIR, Poisson or GLM with offset log(PY), always use the time-split Lexis version so that exposure is counted in the correct year and attained-age stratum.
Code
sr_poredadsexanio <- biostat3::survRate(Surv(pyrs, status==1) ~ strata(sex_rec)+
strata(agegroup)+ strata(year), data=
clean_df_corr_surv|>
mutate(agegroup = cut(
disch_age_rec, # la variable de edad
breaks = c(18, 30, 45, 60, 76), # límites (incluye 15, excluye 65)
right = FALSE, # intervalo izquierdo cerrado [15–30)
labels = c("18-29", "30-44", "45-59", "60+"),
include.lowest = TRUE # 15 entra en el primer tramo
),
#year= lubridate::year(as.Date(exit_date))
year = lubridate::year(as.Date(disch_date_num_rec6)) # USE DISCHARGE YEAR
)|>
filter(disch_age_rec>17, disch_age_rec<76))
#this approach for calculating adjusted SIR is not advisable because what I explained in quarto.
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
df_glm <- c_SISTRAT_c1 %>%
# agegroup to labels
mutate(
agegroup = case_when(
agegroup == 18 ~ "18-29",
agegroup == 30 ~ "30-44",
agegroup == 45 ~ "45-59",
agegroup == 60 ~ "60+"
),
sex = as.character(sex)
) %>%
# Join with reference life-tables
left_join(
mx_1x1_banded %>%
mutate(
agegroup = case_when(
agegroup == 18 ~ "18-29",
agegroup == 30 ~ "30-44",
agegroup == 45 ~ "45-59",
agegroup == 60 ~ "60+"
),
sex = as.character(sex)
),
by = c("agegroup", "year", "sex")
) %>%
# Expected events
mutate(expected = pyrs * haz)
model_poisson <- glm(
from0to1 ~ 1,
family = poisson,
offset = log(expected),
data = df_glm
)
pearson_chisq <- sum(residuals(model_poisson, type = "pearson")^2)
df_residual <- df.residual(model_poisson)
dispersion_index <- pearson_chisq / df_residual
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
df_glm_b <- c_SISTRAT_c1_b %>%
# agegroup to labels
mutate(
agegroup = case_when(
agegroup == 18 ~ "18-29",
agegroup == 30 ~ "30-44",
agegroup == 45 ~ "45-59",
agegroup == 60 ~ "60+"
),
sex = as.character(sex)
) %>%
# Join with reference life-tables
left_join(
mx_1x1_banded %>%
mutate(
agegroup = case_when(
agegroup == 18 ~ "18-29",
agegroup == 30 ~ "30-44",
agegroup == 45 ~ "45-59",
agegroup == 60 ~ "60+"
),
sex = as.character(sex)
),
by = c("agegroup", "year", "sex")
) %>%
# Expected events
mutate(expected = pyrs * haz)
model_poisson_b <- glm(
from0to1 ~ 1,
family = poisson,
offset = log(expected),
data = df_glm_b
)
pearson_chisq_b <- sum(residuals(model_poisson_b, type = "pearson")^2)
df_residual_b <- df.residual(model_poisson_b)
dispersion_index_b <- pearson_chisq_b / df_residual_b
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
df_glm_c <- c_SISTRAT_c1_c %>%
# agegroup to labels
mutate(
agegroup = case_when(
agegroup == 18 ~ "18-29",
agegroup == 30 ~ "30-44",
agegroup == 45 ~ "45-59",
agegroup == 60 ~ "60+"
),
sex = as.character(sex)
) %>%
# Join with reference life-tables
left_join(
mx_1x1_banded %>%
mutate(
agegroup = case_when(
agegroup == 18 ~ "18-29",
agegroup == 30 ~ "30-44",
agegroup == 45 ~ "45-59",
agegroup == 60 ~ "60+"
),
sex = as.character(sex)
),
by = c("agegroup", "year", "sex")
) %>%
# Expected events
mutate(expected = pyrs * haz)
model_poisson_c <- glm(
from0to1 ~ 1,
family = poisson,
offset = log(expected),
data = df_glm_c
)
pearson_chisq_c <- sum(residuals(model_poisson_c, type = "pearson")^2)
df_residual_c <- df.residual(model_poisson_c)
dispersion_index_c <- pearson_chisq_c / df_residual_cCode
#https://bendixcarstensen.com/Epi/flup.pdf
#https://onlinelibrary.wiley.com/doi/full/10.1002%2Fijc.34973
sir_tot<- popEpi::sir( coh.data = c_SISTRAT_c1, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot
c_SISTRAT_c1_fot <- lexpand( clean_df,
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#fot=0:10,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76), fot = c(0, .0386,.2465, .5, 1, 3, 5, 7, 9, Inf)),
aggre = list(agegroup = age, year = per, sex= sex_rec, fot= fot) )
sir_tot_fot<- popEpi::sir( coh.data = c_SISTRAT_c1_fot, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
print="fot",
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_fot
sr_1_sex_fot <- popEpi::sir(c_SISTRAT_c1_fot, coh.obs = 'from0to1',
coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
ref.rate = haz,
print = c("sex", "fot"),
adjust = c("agegroup", "sex", "year"),
test.type = "homogeneity",
conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
conf.level = 0.95, EAR = T)
sr_1_sex <- popEpi::sir(c_SISTRAT_c1, coh.obs = 'from0to1',
coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
ref.rate = haz,
print = c("sex"),
adjust = c("agegroup", "sex", "year"),
test.type = "homogeneity",
conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
conf.level = 0.95, EAR = T)
sr_1_age <- popEpi::sir(c_SISTRAT_c1, coh.obs = 'from0to1',
coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
ref.rate = haz,
print = c("agegroup"),
adjust = c("agegroup", "sex", "year"),
test.type = "homogeneity",
conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
conf.level = 0.95, EAR = T)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
sr_tot_df<-
cbind.data.frame(
total= "Overall",
observed= round(sir_tot$observed,0),
pyrs= round(sir_tot$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot$observed, sir_tot$pyrs, phi=1))))),
expected= round(sir_tot$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot, phi=dispersion_index)[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot$EAR)),
phi=dispersion_index)
# df_glm <- c_SISTRAT_c1 %>%
# dplyr::left_join(mx_1x1_banded, by = c("agegroup", "year", "sex")) %>%
# dplyr::mutate(expected = pyrs * haz)
# Run analysis
sr_1_sex_df <- sir_cmr_subgroup(
df = df_glm,
group_var = "sex"
)
sr_1_sex_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_sex$observed, sr_1_sex$pyrs, phi=1))))
sr_1_age_df <- sir_cmr_subgroup(
df = df_glm,
group_var = "agegroup"
)
sr_1_age_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_age$observed, sr_1_age$pyrs, phi=1))))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("Now by strata")
sir_tot_amb<- popEpi::sir( coh.data = c_SISTRAT_c1_amb, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_amb_df<-
cbind.data.frame(
total= "Ambulatory",
observed= round(sir_tot_amb$observed,0),
pyrs= round(sir_tot_amb$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_amb$observed, sir_tot_amb$pyrs, phi= 1))))),
expected= round(sir_tot_amb$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_amb, phi= extract_phi(c_SISTRAT_c1_amb))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_amb$EAR)),
phi=extract_phi(c_SISTRAT_c1_amb))
sir_tot_res<- popEpi::sir( coh.data = c_SISTRAT_c1_res, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_res_df<-
cbind.data.frame(
total= "Residential",
observed= round(sir_tot_res$observed,0),
pyrs= round(sir_tot_res$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_res$observed, sir_tot_res$pyrs, phi= 1))))),
expected= round(sir_tot_res$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_res, phi= extract_phi(c_SISTRAT_c1_res))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_res$EAR)),
phi=extract_phi(c_SISTRAT_c1_res))
sir_tot_illicit<- popEpi::sir( coh.data = c_SISTRAT_c1_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_illicit_df<-
cbind.data.frame(
total= "Illicit",
observed= round(sir_tot_illicit$observed,0),
pyrs= round(sir_tot_illicit$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_illicit$observed, sir_tot_illicit$pyrs, phi= 1))))),
expected= round(sir_tot_illicit$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_illicit, phi= extract_phi(c_SISTRAT_c1_illicit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_illicit$EAR)),
phi=extract_phi(c_SISTRAT_c1_illicit))
sir_tot_licit<- popEpi::sir( coh.data = c_SISTRAT_c1_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_licit_df<-
cbind.data.frame(
total= "Licit",
observed= round(sir_tot_licit$observed,0),
pyrs= round(sir_tot_licit$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_licit$observed, sir_tot_licit$pyrs, phi= 1))))),
expected= round(sir_tot_licit$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_licit, phi= extract_phi(c_SISTRAT_c1_licit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_licit$EAR)),
phi=extract_phi(c_SISTRAT_c1_licit))
sir_tot_comp<- popEpi::sir( coh.data = c_SISTRAT_c1_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_comp_df<-
cbind.data.frame(
total= "Completed",
observed= round(sir_tot_comp$observed,0),
pyrs= round(sir_tot_comp$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_comp$observed, sir_tot_comp$pyrs, phi= 1))))),
expected= round(sir_tot_comp$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_comp, phi= extract_phi(c_SISTRAT_c1_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_comp$EAR)),
phi=extract_phi(c_SISTRAT_c1_comp))
sir_tot_not_comp<- popEpi::sir( coh.data = c_SISTRAT_c1_not_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_not_comp_df<-
cbind.data.frame(
total= "Not completed",
observed= round(sir_tot_not_comp$observed,0),
pyrs= round(sir_tot_not_comp$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_not_comp$observed, sir_tot_not_comp$pyrs, phi= 1))))),
expected= round(sir_tot_not_comp$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_not_comp, phi= extract_phi(c_SISTRAT_c1_not_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_not_comp$EAR)),
phi=extract_phi(c_SISTRAT_c1_not_comp))SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile)
Total sir: 3.59 (3.46-3.72)
Total observed: 2996
Total expected: 834.72
Total person-years: 353826
observed expected pyrs sir sir.lo sir.hi p_value EAR
<num> <num> <num> <num> <num> <num> <num> <num>
1: 2996 834.72 353826 3.59 3.46 3.72 0 6.108
SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile)
Test for homogeneity: p < 0.001
Total sir: 3.59 (3.46-3.72)
Total observed: 2996
Total expected: 834.72
Total person-years: 353826
Clave <fot>
fot observed expected pyrs sir sir.lo sir.hi p_value EAR
<num> <num> <num> <num> <num> <num> <num> <num> <num>
1: 0.0000 49 5.66 2700.92 8.66 6.46 11.32 0 16.047
2: 0.0386 135 30.31 14408.71 4.45 3.74 5.25 0 7.266
3: 0.2465 135 36.67 17287.71 3.68 3.09 4.34 0 5.688
4: 0.5000 239 71.96 33364.72 3.32 2.92 3.76 0 5.006
5: 1.0000 1028 264.11 116421.81 3.89 3.66 4.14 0 6.561
6: 3.0000 696 205.71 84817.22 3.38 3.14 3.64 0 5.780
7: 5.0000 452 133.75 52809.05 3.38 3.08 3.70 0 6.026
8: 7.0000 209 69.15 26021.20 3.02 2.63 3.45 0 5.374
9: 9.0000 53 17.40 5994.64 3.05 2.30 3.94 0 5.939
Now by strata
Code
cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sr_tot_df, arrange(sr_1_sex_df,desc(sex)), sr_1_age_df, sir_tot_amb_df, sir_tot_res_df, sir_tot_illicit_df, sir_tot_licit_df, sir_tot_comp_df, sir_tot_not_comp_df)|>
rename("Characteristic"="total")|>
mutate(Characteristic= case_when(is.na(Characteristic)& sex=="Female"~"Female",
is.na(Characteristic)& sex=="Male"~"Male",
is.na(Characteristic)& grepl("18",agegroup)~"18-29",
is.na(Characteristic)& grepl("30",agegroup)~"30-44",
is.na(Characteristic)& grepl("45",agegroup)~"45-59",
is.na(Characteristic)& grepl("60",agegroup)~"60+",T~Characteristic
))|>
(\(df) {
df->> df_smr_ind
df
})()|>
dplyr::select(-sex, -agegroup)|>
extract(
SMR,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group")Dispersion-corrected 95% confidence intervals
| Characteristic | observed | pyrs | CMR_1000 | expected | EAR | phi | SMR_dir |
|---|---|---|---|---|---|---|---|
| Overall | 2996 | 353826 | 8.5 (8.2–8.8) | 835 | 6.11 | 3.1764435 | 3.59 (3.37–3.83) |
| Male | 2396 | 268325 | 8.9 (8.6–9.3) | 725 | 6.23 | 1.6149215 | 3.30 (3.14–3.48) |
| Female | 600 | 85501 | 7.0 (6.5–7.6) | 110 | 5.73 | 1.3469335 | 5.47 (4.98–6.00) |
| 18-29 | 224 | 77125 | 2.9 (2.5–3.3) | 65 | 2.06 | 3.1091828 | 3.44 (2.73–4.33) |
| 30-44 | 1070 | 183191 | 5.8 (5.5–6.2) | 276 | 4.33 | 5.6933881 | 3.88 (3.36–4.47) |
| 45-59 | 1343 | 82119 | 16.4 (15.5–17.3) | 374 | 11.80 | 2.0250692 | 3.59 (3.33–3.87) |
| 60+ | 359 | 11391 | 31.5 (28.4–34.9) | 119 | 21.04 | 1.3781331 | 3.01 (2.67–3.40) |
| Ambulatory | 2488 | 302963 | 8.2 (7.9–8.5) | 728 | 5.81 | 1.1859841 | 3.42 (3.27–3.57) |
| Residential | 508 | 50863 | 10.0 (9.2–10.9) | 107 | 7.89 | 0.8515432 | 4.76 (4.39–5.15) |
| Illicit | 1254 | 246046 | 5.1 (4.8–5.4) | 467 | 3.20 | 1.7595626 | 2.68 (2.49–2.89) |
| Licit | 1742 | 107780 | 16.2 (15.4–16.9) | 367 | 12.76 | 1.0611805 | 4.74 (4.52–4.98) |
| Completed | 671 | 83248 | 8.1 (7.5–8.7) | 240 | 5.18 | 0.8834808 | 2.80 (2.60–3.00) |
| Not completed | 2325 | 270578 | 8.6 (8.3–8.9) | 595 | 6.39 | 1.1754916 | 3.91 (3.74–4.09) |
Code
cat("Differences by sex in unadjusted CMRs (not phi inflated)\n")
rbind.data.frame(
cbind.data.frame(cat= "Sex (ref= female)", IRR= fisher.test(matrix(c(2396,600,268325,85501),2,2))$estimate[[1]], lower= fisher.test(matrix(c(2396,600,268325,85501),2,2))$conf.int[[1]], upper= fisher.test(matrix(c(2396,600,268325,85501),2,2))$conf.int[[2]]),
cbind.data.frame(cat= "Tr. setting (ref=not completed)", IRR= fisher.test(matrix(c(2488,508,302963,50863),2,2))$estimate[[1]], lower= fisher.test(matrix(c(2488,508,302963,50863),2,2))$conf.int[[1]], upper= fisher.test(matrix(c(2488,508,302963,50863),2,2))$conf.int[[2]]),
cbind.data.frame(cat= "Tr. compliance (ref= residential)", IRR= fisher.test(matrix(c(671,2325,83248,270578),2,2))$estimate[[1]], lower= fisher.test(matrix(c(671,2325,83248,270578),2,2))$conf.int[[1]], upper= fisher.test(matrix(c(671,2325,83248,270578),2,2))$conf.int[[2]]),
cbind.data.frame(cat= "Primary subs.(ref= licit)", IRR= fisher.test(matrix(c(1254,1742,246046,107780),2,2))$estimate[[1]], lower= fisher.test(matrix(c(1254,1742,246046,107780),2,2))$conf.int[[1]], upper= fisher.test(matrix(c(1254,1742,246046,107780),2,2))$conf.int[[2]])
) |> knitr::kable("markdown", caption="Exact test for IRRs (crude CMRs)", digits=2)Differences by sex in unadjusted CMRs (not phi inflated)
| cat | IRR | lower | upper |
|---|---|---|---|
| Sex (ref= female) | 1.27 | 1.16 | 1.39 |
| Tr. setting (ref=not completed) | 0.82 | 0.75 | 0.91 |
| Tr. compliance (ref= residential) | 0.94 | 0.86 | 1.02 |
| Primary subs.(ref= licit) | 0.32 | 0.29 | 0.34 |
We replicated the analysis for the last available treatment.
Code
sir_tot_b<-
sir( coh.data = c_SISTRAT_c1_b, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_b
sr_1_sex_b <- popEpi::sir(c_SISTRAT_c1_b, coh.obs = 'from0to1',
coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
ref.rate = haz,
print = c("sex"),
adjust = c("agegroup", "sex", "year"),
test.type = "homogeneity",
conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
conf.level = 0.95, EAR = T)
sr_1_age_b <- popEpi::sir(c_SISTRAT_c1_b, coh.obs = 'from0to1',
coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
ref.rate = haz,
print = c("agegroup"),
adjust = c("agegroup", "sex", "year"),
test.type = "homogeneity",
conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
conf.level = 0.95, EAR = T)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
sr_tot_b_df<-
cbind.data.frame(
total= "Overall",
observed= round(sir_tot_b$observed,0),
pyrs= round(sir_tot_b$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_b$observed, sir_tot_b$pyrs, phi=1))))),
expected= round(sir_tot_b$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_b, phi=dispersion_index_b)[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_b$EAR)),
phi= dispersion_index_b)
# Run analysis
sr_1_sex_b_df <- sir_cmr_subgroup(
df = df_glm_b,
group_var = "sex"
)
sr_1_sex_b_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_sex_b$observed, sr_1_sex_b$pyrs, phi=1))))
sr_1_age_b_df <- sir_cmr_subgroup(
df = df_glm_b,
group_var = "agegroup"
)
sr_1_age_b_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_age_b$observed, sr_1_age_b$pyrs, phi=1))))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("Now by strata")
sir_tot_amb_b<- popEpi::sir( coh.data = c_SISTRAT_c1_b_amb, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_amb_b_df<-
cbind.data.frame(
total= "Ambulatory",
observed= round(sir_tot_amb_b$observed,0),
pyrs= round(sir_tot_amb_b$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_amb_b$observed, sir_tot_amb_b$pyrs, phi= 1))))),
expected= round(sir_tot_amb_b$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_amb_b, phi= extract_phi(c_SISTRAT_c1_b_amb))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_amb_b$EAR)),
phi=extract_phi(c_SISTRAT_c1_b_amb))
sir_tot_res_b<- popEpi::sir( coh.data = c_SISTRAT_c1_b_res, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_res_b_df<-
cbind.data.frame(
total= "Residential",
observed= round(sir_tot_res_b$observed,0),
pyrs= round(sir_tot_res_b$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_res_b$observed, sir_tot_res_b$pyrs, phi= 1))))),
expected= round(sir_tot_res_b$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_res_b, phi= extract_phi(c_SISTRAT_c1_b_res))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_res_b$EAR)),
phi=extract_phi(c_SISTRAT_c1_b_res))
sir_tot_b_illicit<- popEpi::sir( coh.data = c_SISTRAT_c1_b_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_illicit_b_df<-
cbind.data.frame(
total= "Illicit",
observed= round(sir_tot_b_illicit$observed,0),
pyrs= round(sir_tot_b_illicit$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_b_illicit$observed, sir_tot_b_illicit$pyrs, phi= 1))))),
expected= round(sir_tot_b_illicit$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_b_illicit, phi= extract_phi(c_SISTRAT_c1_b_illicit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_b_illicit$EAR)),
phi=extract_phi(c_SISTRAT_c1_b_illicit))
sir_tot_licit_b<- popEpi::sir( coh.data = c_SISTRAT_c1_b_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_licit_b_df<-
cbind.data.frame(
total= "Licit",
observed= round(sir_tot_licit_b$observed,0),
pyrs= round(sir_tot_licit_b$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_licit_b$observed, sir_tot_licit_b$pyrs, phi= 1))))),
expected= round(sir_tot_licit_b$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_licit_b, phi= extract_phi(c_SISTRAT_c1_b_licit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_licit_b$EAR)),
phi=extract_phi(c_SISTRAT_c1_b_licit))
sir_tot_b_comp<- popEpi::sir( coh.data = c_SISTRAT_c1_b_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_comp_b_df<-
cbind.data.frame(
total= "Completed",
observed= round(sir_tot_b_comp$observed,0),
pyrs= round(sir_tot_b_comp$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_b_comp$observed, sir_tot_b_comp$pyrs, phi= 1))))),
expected= round(sir_tot_b_comp$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_b_comp, phi= extract_phi(c_SISTRAT_c1_b_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_b_comp$EAR)),
phi=extract_phi(c_SISTRAT_c1_b_comp))
sir_tot_b_not_comp<- popEpi::sir( coh.data = c_SISTRAT_c1_b_not_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_not_comp_b_df<-
cbind.data.frame(
total= "Not completed",
observed= round(sir_tot_b_not_comp$observed,0),
pyrs= round(sir_tot_b_not_comp$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_b_not_comp$observed, sir_tot_b_not_comp$pyrs, phi= 1))))),
expected= round(sir_tot_b_not_comp$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_b_not_comp, phi= extract_phi(c_SISTRAT_c1_b_not_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_b_not_comp$EAR)),
phi=extract_phi(c_SISTRAT_c1_b_not_comp))SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile)
Total sir: 3.96 (3.82-4.1)
Total observed: 3029
Total expected: 765.8
Total person-years: 317628
observed expected pyrs sir sir.lo sir.hi p_value EAR
<num> <num> <num> <num> <num> <num> <num> <num>
1: 3029 765.8 317627.6 3.96 3.82 4.1 0 7.125
Now by strata
Code
cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sr_tot_b_df, arrange(sr_1_sex_b_df,desc(sex)), sr_1_age_b_df, sir_tot_amb_b_df, sir_tot_res_b_df, sir_tot_illicit_b_df, sir_tot_licit_b_df, sir_tot_comp_b_df, sir_tot_not_comp_b_df)|>
rename("Characteristic"="total")|>
mutate(Characteristic= case_when(is.na(Characteristic)& sex=="Female"~"Female",
is.na(Characteristic)& sex=="Male"~"Male",
is.na(Characteristic)& grepl("18",agegroup)~"18-29",
is.na(Characteristic)& grepl("30",agegroup)~"30-44",
is.na(Characteristic)& grepl("45",agegroup)~"45-59",
is.na(Characteristic)& grepl("60",agegroup)~"60+",T~Characteristic
))|>
dplyr::select(-sex, -agegroup)|>
(\(df) {
df->> df_smr_ind_b
df
})()|>
extract(
SMR,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group, last treatment available")Dispersion-corrected 95% confidence intervals
| Characteristic | observed | pyrs | CMR_1000 | expected | EAR | phi | SMR_dir |
|---|---|---|---|---|---|---|---|
| Overall | 3029 | 317628 | 9.5 (9.2–9.9) | 766 | 7.12 | 3.8986658 | 3.96 (3.69–4.24) |
| Male | 2425 | 242583 | 10.0 (9.6–10.4) | 666 | 7.25 | 2.5908006 | 3.64 (3.42–3.88) |
| Female | 604 | 75044 | 8.0 (7.4–8.7) | 100 | 6.71 | 1.7020223 | 6.03 (5.43–6.69) |
| 18-29 | 222 | 66757 | 3.3 (2.9–3.8) | 57 | 2.47 | 3.6155528 | 3.90 (3.04–5.01) |
| 30-44 | 1097 | 163996 | 6.7 (6.3–7.1) | 248 | 5.18 | 6.4658487 | 4.42 (3.80–5.14) |
| 45-59 | 1351 | 75900 | 17.8 (16.9–18.8) | 346 | 13.24 | 2.6600062 | 3.90 (3.58–4.26) |
| 60+ | 359 | 10975 | 32.7 (29.5–36.3) | 115 | 22.26 | 1.4270522 | 3.13 (2.77–3.54) |
| Ambulatory | 2452 | 268495 | 9.1 (8.8–9.5) | 661 | 6.67 | 1.1849895 | 3.71 (3.55–3.87) |
| Residential | 577 | 49132 | 11.7 (10.8–12.7) | 105 | 9.61 | 0.7615739 | 5.49 (5.11–5.90) |
| Illicit | 1242 | 218996 | 5.7 (5.4–6.0) | 425 | 3.73 | 1.7017815 | 2.93 (2.72–3.15) |
| Licit | 1787 | 98631 | 18.1 (17.3–19.0) | 341 | 14.66 | 1.2348696 | 5.24 (4.97–5.51) |
| Completed | 716 | 81226 | 8.8 (8.2–9.5) | 237 | 5.90 | 0.6639505 | 3.02 (2.85–3.21) |
| Not completed | 2313 | 236402 | 9.8 (9.4–10.2) | 529 | 7.55 | 1.2548334 | 4.37 (4.18–4.58) |
We replicated the analysis for patients in unfinished treatments
Code
sir_tot_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_c
sr_1_c_sex <- popEpi::sir(c_SISTRAT_c1_c, coh.obs = 'from0to1',
coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
ref.rate = haz,
print = c("sex"),
adjust = c("agegroup", "sex", "year"),
test.type = "homogeneity",
conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
conf.level = 0.95, EAR = T)
sr_1_c_age <- popEpi::sir(c_SISTRAT_c1_c, coh.obs = 'from0to1',
coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
ref.rate = haz,
print = c("agegroup"),
adjust = c("agegroup", "sex", "year"),
test.type = "homogeneity",
conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
conf.level = 0.95, EAR = T)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
sr_tot_c_df<-
cbind.data.frame(
total= "Overall",
observed= round(sir_tot_c$observed,0),
pyrs= round(sir_tot_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_c$observed, sir_tot_c$pyrs, phi=1))))),
expected= round(sir_tot_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_c, phi=dispersion_index_c)[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_c$EAR)),
phi=dispersion_index_c)
# Run analysis
sr_1_sex_c_df <- sir_cmr_subgroup(
df = df_glm_c,
group_var = "sex"
)
sr_1_sex_c_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_c_sex$observed, sr_1_c_sex$pyrs, phi=1))))
sr_1_age_c_df <- sir_cmr_subgroup(
df = df_glm_c,
group_var = "agegroup"
)
sr_1_age_c_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_c_age$observed, sr_1_c_age$pyrs, phi=1))))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("Now by strata")
sir_tot_c_amb<- popEpi::sir( coh.data = c_SISTRAT_c1_c_amb, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_amb_c_df<-
cbind.data.frame(
total= "Ambulatory",
observed= round(sir_tot_c_amb$observed,0),
pyrs= round(sir_tot_c_amb$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_c_amb$observed, sir_tot_c_amb$pyrs, phi= 1))))),
expected= round(sir_tot_c_amb$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_c_amb, phi= extract_phi(c_SISTRAT_c1_c_amb))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_c_amb$EAR)),
phi=extract_phi(c_SISTRAT_c1_c_amb))
sir_tot_c_res<- popEpi::sir( coh.data = c_SISTRAT_c1_c_res, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_res_c_df<-
cbind.data.frame(
total= "Residential",
observed= round(sir_tot_c_res$observed,0),
pyrs= round(sir_tot_c_res$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_c_res$observed, sir_tot_c_res$pyrs, phi= 1))))),
expected= round(sir_tot_c_res$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_c_res, phi= extract_phi(c_SISTRAT_c1_c_res))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_c_res$EAR)),
phi=extract_phi(c_SISTRAT_c1_c_res))
sir_tot_illicit_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_illicit_c_df<-
cbind.data.frame(
total= "Illicit",
observed= round(sir_tot_illicit_c$observed,0),
pyrs= round(sir_tot_illicit_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_illicit_c$observed, sir_tot_illicit_c$pyrs, phi= 1))))),
expected= round(sir_tot_illicit_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_illicit_c, phi= extract_phi(c_SISTRAT_c1_c_illicit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_illicit_c$EAR)),
phi=extract_phi(c_SISTRAT_c1_c_illicit))
sir_tot_licit_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_licit_c_df<-
cbind.data.frame(
total= "Licit",
observed= round(sir_tot_licit_c$observed,0),
pyrs= round(sir_tot_licit_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_licit_c$observed, sir_tot_licit_c$pyrs, phi= 1))))),
expected= round(sir_tot_licit_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_licit_c, phi= extract_phi(c_SISTRAT_c1_c_licit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_licit_c$EAR)),
phi=extract_phi(c_SISTRAT_c1_c_licit))
sir_tot_comp_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_comp_c_df<-
cbind.data.frame(
total= "Completed",
observed= round(sir_tot_comp_c$observed,0),
pyrs= round(sir_tot_comp_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_comp_c$observed, sir_tot_comp_c$pyrs, phi= 1))))),
expected= round(sir_tot_comp_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_comp_c, phi= extract_phi(c_SISTRAT_c1_c_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_comp_c$EAR)),
phi=extract_phi(c_SISTRAT_c1_c_comp))
sir_tot_not_comp_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_not_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_not_comp_c_df<-
cbind.data.frame(
total= "Not completed",
observed= round(sir_tot_not_comp_c$observed,0),
pyrs= round(sir_tot_not_comp_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_not_comp_c$observed, sir_tot_not_comp_c$pyrs, phi= 1))))),
expected= round(sir_tot_not_comp_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_not_comp_c, phi= extract_phi(c_SISTRAT_c1_c_not_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_not_comp_c$EAR)),
phi=extract_phi(c_SISTRAT_c1_c_not_comp))SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile)
Total sir: 3.93 (3.81-4.06)
Total observed: 3817
Total expected: 970.26
Total person-years: 416286
observed expected pyrs sir sir.lo sir.hi p_value EAR
<num> <num> <num> <num> <num> <num> <num> <num>
1: 3817 970.26 416285.6 3.93 3.81 4.06 0 6.838
Now by strata
Code
cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sr_tot_c_df, arrange(sr_1_sex_c_df,desc(sex)), sr_1_age_c_df, sir_tot_amb_c_df, sir_tot_res_c_df, sir_tot_illicit_c_df, sir_tot_licit_c_df, sir_tot_comp_c_df, sir_tot_not_comp_c_df)|>
rename("Characteristic"="total")|>
mutate(Characteristic= case_when(is.na(Characteristic)& sex=="Female"~"Female",
is.na(Characteristic)& sex=="Male"~"Male",
is.na(Characteristic)& grepl("18",agegroup)~"18-29",
is.na(Characteristic)& grepl("30",agegroup)~"30-44",
is.na(Characteristic)& grepl("45",agegroup)~"45-59",
is.na(Characteristic)& grepl("60",agegroup)~"60+",T~Characteristic
))|>
(\(df) {
df->> df_smr_ind_c
df
})()|>
dplyr::select(-sex, -agegroup)|>
extract(
SMR,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group. Unfinished treatments")Dispersion-corrected 95% confidence intervals
| Characteristic | observed | pyrs | CMR_1000 | expected | EAR | phi | SMR_dir |
|---|---|---|---|---|---|---|---|
| Overall | 3817 | 416286 | 9.2 (8.9–9.5) | 970 | 6.84 | 3.4353379 | 3.93 (3.71–4.17) |
| Male | 3012 | 310088 | 9.7 (9.4–10.1) | 833 | 7.03 | 1.4783122 | 3.62 (3.46–3.78) |
| Female | 805 | 106197 | 7.6 (7.1–8.1) | 137 | 6.29 | 1.4002056 | 5.86 (5.40–6.36) |
| 18-29 | 283 | 91929 | 3.1 (2.7–3.5) | 77 | 2.24 | 3.0062893 | 3.67 (3.00–4.49) |
| 30-44 | 1360 | 215034 | 6.3 (6.0–6.7) | 321 | 4.83 | 5.9053667 | 4.23 (3.72–4.81) |
| 45-59 | 1710 | 96010 | 17.8 (17.0–18.7) | 434 | 13.29 | 3.2505793 | 3.94 (3.62–4.29) |
| 60+ | 464 | 13312 | 34.9 (31.8–38.2) | 138 | 24.49 | 1.0085170 | 3.36 (3.07–3.68) |
| Ambulatory | 3163 | 355445 | 8.9 (8.6–9.2) | 842 | 6.53 | 1.0588197 | 3.75 (3.62–3.89) |
| Residential | 654 | 60841 | 10.7 (10.0–11.6) | 128 | 8.65 | 0.8869265 | 5.12 (4.76–5.50) |
| Illicit | 1577 | 291633 | 5.4 (5.1–5.7) | 550 | 3.52 | 1.6430539 | 2.87 (2.69–3.06) |
| Licit | 2240 | 124653 | 18.0 (17.2–18.7) | 421 | 14.60 | 0.9672190 | 5.32 (5.11–5.55) |
| Completed | 1341 | 145632 | 9.2 (8.7–9.7) | 375 | 6.63 | 0.7494584 | 3.57 (3.41–3.74) |
| Not completed | 2476 | 270654 | 9.1 (8.8–9.5) | 595 | 6.95 | 1.0806766 | 4.16 (3.99–4.34) |
Code
# ==============================================================
# Aggregated Sir-Spline optimiser (for c_SISTRAT_c1)
# ==============================================================
sirspline_agg <- function(
data, # cohort data, already aggregated
obs.var = "observed", # column with case counts
pyrs.var = "pyrs", # column with person-years
ref, # data.frame with expected rates
rate.var = "haz", # column in ref containing rate
by.vars = c("agegroup","year","sex"), # merge variables
spline.vars = c("agegroup","year"), # timescales to spline
df.grid = list(agegroup = 2:5, # candidate df
year = 2:5),
interaction = FALSE, # include ns(age) : ns(year) ?
adjust = NULL, # linear covariates
strata = NULL, # factor strata
knot.weight = "pyrs", # "pyrs" (=weighted) or "equal"
family = poisson,
control = glm.control(maxit = 100),
verbose = TRUE) {
stopifnot(requireNamespace("splines", quietly = TRUE))
# -------- 0 harmonise column names ----------------------------------
names(data)[ match(obs.var, names(data)) ] <- "OBS"
names(data)[ match(pyrs.var, names(data)) ] <- "PYRS"
names(ref )[ match(rate.var, names(ref )) ] <- "RATE"
# -------- 1 merge expected rates & build expected -------------------
dat <- merge(data, ref, by = by.vars, all.x = TRUE)
dat$EXPECTED <- with(dat, PYRS * RATE)
dat <- dat[ complete.cases(dat$EXPECTED) & dat$EXPECTED > 0 , ]
if(!nrow(dat)) stop("All rows removed – no positive expected values.")
# make sure spline variables are numeric
for(v in spline.vars) dat[[v]] <- as.numeric(as.character(dat[[v]]))
# -------- 2 build grid of df combinations ---------------------------
df.df <- expand.grid(df.grid, KEEP.OUT.ATTRS = FALSE)
names(df.df) <- spline.vars
# -------- 3 storage --------------------------------------------------
fits <- vector("list", nrow(df.df))
stats <- data.frame()
# -------- 4 loop over grid ------------------------------------------
for(i in seq_len(nrow(df.df))) {
dfs <- unlist(df.df[i, ], use.names = TRUE)
# ---- 4·1 spline terms ---------------------------------------------
splTerms <- mapply(function(v, k) {
x <- dat[[v]]
q <- if(knot.weight == "pyrs") {
# person-time weighted quantiles
rep(x, times = round(dat$PYRS)) # integer weights OK
} else x
knots <- stats::quantile(q,
probs = seq(0,1,length.out = k+1L),
type = 1, na.rm = TRUE)
knots <- unique(knots)
bd <- range(knots); inKnot <- setdiff(knots, bd)
if(length(inKnot))
sprintf("splines::ns(%s, knots=c(%s), Boundary.knots=c(%s))",
v, paste(inKnot, collapse=","), paste(bd,collapse=","))
else
sprintf("splines::ns(%s, Boundary.knots=c(%s))",
v, paste(bd, collapse=","))
}, v = spline.vars, k = dfs, SIMPLIFY = TRUE, USE.NAMES = FALSE)
rhs <- c(adjust, splTerms)
if(interaction && length(spline.vars) == 2)
rhs <- c(rhs,
sprintf("(%s):(%s)", splTerms[1], splTerms[2]))
if(length(strata))
rhs <- c(rhs, sprintf("factor(%s)", strata))
frm <- reformulate(rhs, response = "OBS")
# ---- 4·2 fit -------------------------------------------------------
fit <- tryCatch(
glm(frm, data = dat,
offset = log(EXPECTED),
family = family,
control = control),
error = identity,
warning = identity)
ok <- inherits(fit,"glm") && fit$converged &&
all(is.finite(coef(fit)))
if(ok) {
fits[[i]] <- fit
stats <- rbind(stats,
data.frame(row = i,
df_combo = paste(dfs, collapse=","),
AIC = stats::AIC(fit),
BIC = stats::BIC(fit),
n_par = length(coef(fit)),
warning = "",
stringsAsFactors = FALSE))
} else {
stats <- rbind(stats,
data.frame(row = i,
df_combo = paste(dfs, collapse=","),
AIC = NA, BIC = NA,
n_par = NA,
warning = conditionMessage(fit),
stringsAsFactors = FALSE))
if(verbose)
message("skip ", i, ": ", conditionMessage(fit))
}
}
stats <- stats[order(stats$AIC), ]
out <- list(table = stats, models = fits)
class(out) <- "sirAgg"
out
}
print.sirAgg <- function(x, n = 10, ...) {
cat("Top models by AIC\n")
print(utils::head(x$table, n = n), row.names = FALSE)
invisible(x)
}
sirspline_AIC_optimizer <- function(coh.data, coh.obs, coh.pyrs, ref.data, ref.rate,
spline, knots_list, adjust = NULL) {
# Convert to data frames
coh.data <- as.data.frame(coh.data)
ref.data <- as.data.frame(ref.data)
# Remove spline variables from adjust if present
if (!is.null(adjust)) {
adjust <- setdiff(adjust, spline)
if (length(adjust) == 0) adjust <- NULL
}
# Identify all grouping variables
grouping_vars <- unique(c(spline, adjust))
# 1. Aggregate cohort data
agg_fun <- function(df) {
aggregate(list(observed = df[[coh.obs]],
pyrs = df[[coh.pyrs]]),
by = df[grouping_vars],
FUN = sum, na.rm = TRUE)
}
coh_agg <- agg_fun(coh.data)
# 2. Merge with reference data
data <- merge(coh_agg, ref.data, by = grouping_vars)
# 3. Calculate expected cases
data$expected <- data$pyrs * data[[ref.rate]]
# 4. Clean data - remove rows with missing values
complete_cases <- complete.cases(data[, c(grouping_vars, "observed", "expected")])
data <- data[complete_cases, ]
# 5. Convert spline variables to numeric
for (var in spline) {
data[[var]] <- as.numeric(as.character(data[[var]]))
}
# 6. Prepare results storage
results <- data.frame()
# 7. Fit models for each knot combination
for (i in seq_along(knots_list)) {
k_counts <- knots_list[[i]]
names(k_counts) <- spline # Ensure names match spline variables
# Skip invalid combinations
if (any(k_counts < 2)) {
message("Skipping combination ", i, " (", paste(k_counts, collapse = ","),
") - knots must be >=2")
next
}
# Create spline terms
spline_terms <- sapply(spline, function(var) {
# Calculate knots
knots <- quantile(data[[var]], probs = seq(0, 1, length.out = k_counts[var]),
na.rm = TRUE)
# Boundary and internal knots
boundary <- knots[c(1, length(knots))]
internal <- knots[-c(1, length(knots))]
# Create spline term
if (length(internal) == 0) {
paste0("splines::ns(", var, ", Boundary.knots = c(",
paste(boundary, collapse = ","), "))")
} else {
paste0("splines::ns(", var, ", knots = c(",
paste(internal, collapse = ","), "), Boundary.knots = c(",
paste(boundary, collapse = ","), "))")
}
})
# Create formula
rhs_terms <- c(adjust, spline_terms)
rhs <- paste(rhs_terms, collapse = " + ")
formula <- as.formula(paste("observed ~", rhs))
# Fit model
model <- tryCatch({
glm(formula,
data = data,
offset = log(expected),
family = poisson)
}, error = function(e) {
message("Model failed for combination ", i, " (", paste(k_counts, collapse = ","),
"): ", e$message)
return(NULL)
})
# Skip if model failed
if (is.null(model)) next
# Store results
results <- rbind(results, data.frame(
combination = i,
knots = paste(k_counts, collapse = ","),
AIC = AIC(model),
stringsAsFactors = FALSE
))
}
# Return results sorted by AIC
if (nrow(results) > 0) {
results[order(results$AIC), ]
} else {
message("No valid models were fitted")
results
}
}
# Define knot combinations to test
knots_combinations <- list(
c(agegroup = 1, year = 2),
c(agegroup = 1, year = 3),
c(agegroup = 1, year = 4),
c(agegroup = 1, year = 5),
c(agegroup = 2, year = 2),
c(agegroup = 2, year = 3),
c(agegroup = 2, year = 4),
c(agegroup = 2, year = 5),
c(agegroup = 3, year = 3),
c(agegroup = 3, year = 4),
c(agegroup = 4, year = 3),
c(agegroup = 4, year = 4),
c(agegroup = 5, year = 4),
c(agegroup = 5, year = 3),
c(agegroup = 5, year = 2),
c(agegroup = 5, year = 1)
)
# Run the function (remove spline variables from adjust)
aic_results <- sirspline_AIC_optimizer(
coh.data = c_SISTRAT_c1,
coh.obs = "from0to1",
coh.pyrs = "pyrs",
ref.data = mx_1x1_banded,
ref.rate = "haz",
spline = c("agegroup", "year"),
knots_list = knots_combinations,
adjust = "sex" # Only non-spline variables
)
# Print results
print(aic_results)
# 2. Call the optimiser
opt_sirspline <- sirspline_agg(
data = c_SISTRAT_c1,
obs.var = "from0to1",
pyrs.var = "pyrs",
ref = mx_1x1_banded,
rate.var = "haz",
by.vars = c("agegroup","year","sex"),
spline.vars= c("agegroup","year"),
df.grid = list(agegroup = 1:5, year = 1:5),
interaction= TRUE, # set FALSE if you only want additive terms
adjust = c("sex"), # e.g. "sex" if you add it linearly
strata = NULL) # free baseline for each sex
print(opt_sirspline) # shows the best 10 models by AIC
# 3. Grab the best converged model
best <- opt_sirspline$models[[ opt_sirspline$table$row[1] ]]
#":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"
#":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"
#":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"
st_1 <- sirspline( coh.data = c_SISTRAT_c1, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded, ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
spline = c('agegroup','year'),
knots = c(3, 3), # 3 for age and 3 for year
reference.points = c(2010),
dependent.splines = TRUE)
st_1_lines<- extract_spline_data(st_1)
#plot(st_1, col=4, log=TRUE)
#title('Splines are dependent')
st_1_ind <- sirspline( coh.data = c_SISTRAT_c1, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded, ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
spline = c('agegroup','year'), dependent.splines = F)
st_1_ind_lines<- extract_spline_data(st_1_ind)
#plot(st_1_ind, col=4, log=TRUE)
#title('Splines are independent')
psych::describeBy(st_1_lines, group="spline")
# Descriptive statistics by group
# spline: 1
# vars n mean sd median trimmed mad min max range skew kurtosis se
# spline 1 100 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN NaN 0.00
# spline_value 2 100 39.00 12.31 39.00 39.00 15.72 18.00 60.00 42.00 0.00 -1.24 1.23
# level 3 100 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN NaN 0.00
# estimate 4 100 3.59 0.37 3.66 3.62 0.45 2.72 4.03 1.31 -0.54 -0.88 0.04
# lower_ci 5 100 3.27 0.44 3.37 3.30 0.52 2.41 3.78 1.37 -0.47 -1.17 0.04
# upper_ci 6 100 3.94 0.34 4.04 3.99 0.26 3.04 4.29 1.25 -1.15 0.28 0.03
# ------------------------------------------------------------------------------------------------------------------------
# spline: 2
# vars n mean sd median trimmed mad min max range skew kurtosis se
# spline 1 100 2.00 0.00 2.00 2.00 0.00 2.00 2.00 0.00 NaN NaN 0.00
# spline_value 2 100 2015.00 2.93 2015.00 2015.00 3.74 2010.00 2020.00 10.00 0.00 -1.24 0.29
# level 3 100 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN NaN 0.00
# estimate 4 100 1.17 0.08 1.19 1.18 0.09 1.00 1.27 0.27 -0.52 -0.99 0.01
# lower_ci 5 100 0.98 0.04 1.00 0.99 0.02 0.86 1.02 0.16 -1.36 0.61 0.00
# upper_ci 6 100 1.41 0.18 1.45 1.43 0.19 1.00 1.61 0.61 -0.71 -0.68 0.02
smrspline_data<-
rbind.data.frame(
cbind.data.frame(type="dependent",st_1_lines)#, cbind.data.frame(type="independent",st_1_ind_lines)
)%>%
group_by(spline) |>
mutate(
spline_value_normalized = (spline_value - min(spline_value)) / (max(spline_value) - min(spline_value))
) |>
ungroup() |>
mutate(spline= factor(spline, levels = c("year", "agegroup"), labels = c("Calendar year", "Age at discharge")))
cat("Plot")
ggplot(smrspline_data, aes(x = spline_value_normalized, y = estimate, color = spline, fill = spline))+ #
geom_line()+
geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci, color = spline, fill = spline), alpha = 0.3)+
scale_x_continuous(
name = "Calendar year", # Nombre de tu eje X
breaks = (seq(2010, 2020, by = 2) - 2010) / (2020 - 2010), # Define los quiebres cada 5 unidades
labels = paste0(seq(2010, 2020, by = 2), ""), # Etiquetas de los quiebres
sec.axis = sec_axis(~., breaks= (seq(18, 60, by = 5) - 18) / (60 - 18), labels = paste0(seq(18, 60, by = 5), ""), name= "Age at discharge")
)+
labs(x = "Spline Value", y = "SMR", color = "Variable", fill = "Variable")+
theme_sjPlot_manual()+
theme(legend.position="bottom")+
geom_hline(yintercept = 1, linetype = "dashed", color = "black")+#smallest value is the reference point (where SIR = 1)
theme(
legend.position = "bottom",
panel.grid = element_blank(), # Quitar grids
panel.background = element_blank(), # Fondo blanco sin líneas
#axis.text.x = element_text(angle = 45, hjust = 1), # Rotar etiquetas eje x 45 grados
strip.text = element_text(face = "bold", size=12)
)+
#scale_y_continuous(trans = "exp", breaks = c(1, 2, 3, 4, 5),
# name = "SMR") + # removes the need for readers to exponentiate
scale_color_manual(name = "Variable",
values = c("Calendar year"="grey40",
"Age at discharge"="#4DB3FF"))+
scale_fill_manual(name = "Variable", values = c("Calendar year"="gray60", "Age at discharge"="lightblue"))
ggsave(paste0(gsub("/cons","",getwd()),"/cons/_figs/Fig2_SplineSMR.png"), dpi = 600, width = 7, height = 4.5) combination knots AIC
6 10 3,4 484.2027
5 9 3,3 485.1836
8 12 4,4 485.3776
9 13 5,4 485.3776
7 11 4,3 486.3777
10 14 5,3 486.3777
3 7 2,4 489.9243
2 6 2,3 490.8532
4 8 2,5 491.6667
11 15 5,2 491.9440
1 5 2,2 496.8448
Top models by AIC
row df_combo AIC BIC n_par warning
7 2,2 481.9883 506.7616 10
8 3,2 481.9883 506.7616 10
9 4,2 485.2628 517.4682 13
10 5,2 485.2628 517.4682 13
12 2,3 486.0548 518.2602 13
13 3,3 486.0548 518.2602 13
17 2,4 489.9686 529.6060 16
18 3,4 489.9686 529.6060 16
2 2,1 491.7196 509.0609 7
3 3,1 491.7196 509.0609 7
Descriptive statistics by group
spline: 1
vars n mean sd median trimmed mad min max range skew
spline 1 100 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
spline_value 2 100 39.00 12.31 39.00 39.00 15.72 18.00 60.00 42.00 0.00
level 3 100 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
estimate 4 100 3.59 0.37 3.66 3.62 0.45 2.72 4.03 1.31 -0.54
lower_ci 5 100 3.27 0.44 3.37 3.30 0.52 2.41 3.78 1.37 -0.47
upper_ci 6 100 3.94 0.34 4.04 3.99 0.26 3.04 4.29 1.25 -1.15
kurtosis se
spline NaN 0.00
spline_value -1.24 1.23
level NaN 0.00
estimate -0.88 0.04
lower_ci -1.17 0.04
upper_ci 0.28 0.03
------------------------------------------------------------
spline: 2
vars n mean sd median trimmed mad min max range
spline 1 100 2.00 0.00 2.00 2.00 0.00 2.00 2.00 0.00
spline_value 2 100 2015.00 2.93 2015.00 2015.00 3.74 2010.00 2020.00 10.00
level 3 100 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
estimate 4 100 1.17 0.08 1.19 1.18 0.09 1.00 1.27 0.27
lower_ci 5 100 0.98 0.04 1.00 0.99 0.02 0.86 1.02 0.16
upper_ci 6 100 1.41 0.18 1.45 1.43 0.19 1.00 1.61 0.61
skew kurtosis se
spline NaN NaN 0.00
spline_value 0.00 -1.24 0.29
level NaN NaN 0.00
estimate -0.52 -0.99 0.01
lower_ci -1.36 0.61 0.00
upper_ci -0.71 -0.68 0.02
Plot
Code
# sir_tot_illicit<- popEpi::sir( coh.data = c_SISTRAT_c1_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
# ref.data = mx_1x1_banded,
# ref.rate = 'haz',
# adjust = c('agegroup','year','sex'),
# EAR=T)#Excess Absolute Risks
#
# sir_tot_licit<- popEpi::sir( coh.data = c_SISTRAT_c1_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
# ref.data = mx_1x1_banded,
# ref.rate = 'haz',
# adjust = c('agegroup','year','sex'),
# EAR=T)#Excess Absolute Risks
st_1_illicit <- sirspline( coh.data = c_SISTRAT_c1_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded, ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
spline = c('agegroup','year'), dependent.splines = TRUE)
st_1_lines_illicit<- extract_spline_data(st_1_illicit)
st_1_ind_illicit <- sirspline( coh.data = c_SISTRAT_c1_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded, ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
spline = c('agegroup','year'), dependent.splines = F)
st_1_ind_lines_illicit<- extract_spline_data(st_1_ind_illicit)
st_1_licit <- sirspline( coh.data = c_SISTRAT_c1_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded, ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
spline = c('agegroup','year'), dependent.splines = TRUE)
st_1_lines_licit<- extract_spline_data(st_1_licit)
st_1_ind_licit <- sirspline( coh.data = c_SISTRAT_c1_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded, ref.rate = 'haz',
adjust = c('agegroup','year','sex'),
spline = c('agegroup','year'), dependent.splines = F)
st_1_ind_lines_licit<- extract_spline_data(st_1_ind_licit)
smrspline_data_illicit<-
rbind.data.frame(
cbind.data.frame(subs="illicit", type="dependent",st_1_lines_illicit),
cbind.data.frame(subs="illicit", type="independent",st_1_ind_lines_illicit),
cbind.data.frame(subs="licit", type="dependent",st_1_lines_licit),
cbind.data.frame(subs="licit", type="independent",st_1_ind_lines_licit)
)
cat("Plot")
ggplot(smrspline_data_illicit, aes(x = spline_value, y = estimate, color = factor(type))) +
geom_line() +
geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci, fill = factor(type)), alpha = 0.3) +
facet_wrap(~ spline+subs, scales = "free_x", labeller = as_labeller(
c(agegroup = "Age at Discharge", year = "Calendar Year", illicit= "Illicit substances", licit= "Licit substances (alcohol)"))) +
labs(x = "Spline Value", y = "log(SMR)", color = "All splines in the same model", fill = "All splines in the same model") +
theme_sjPlot_manual()+
theme(legend.position="bottom")+
geom_hline(yintercept = 1, linetype = "dashed", color = "black")+#smallest value is the reference point (where SIR = 1)
theme(
legend.position = "bottom",
panel.grid = element_blank(), # Quitar grids
panel.background = element_blank(), # Fondo blanco sin líneas
axis.text.x = element_text(angle = 45, hjust = 1), # Rotar etiquetas eje x 45 grados
strip.text = element_text(face = "bold", size=12)
)+
scale_y_continuous(trans = "exp", breaks = c(2, 6, 8, 10),
name = "SMR") + # removes the need for readers to exponentiate
scale_color_manual(name = "Spline specification",
values = c("dependent"="grey40",
"independent"="#4DB3FF"))+
scale_fill_manual(name = "Spline specification", values = c("dependent"="gray60",
"independent"="lightblue"))
ggsave(paste0(gsub("/cons","",getwd()),"/cons/_figs/Fig_SplineSMR_illicit.png"), dpi = 600, width = 7, height = 4.5)
prop.table(table(clean_df$disch_age_cat, clean_df$prim_sub_licit),1) |>
data.frame() |>
pivot_wider(names_from=Var2, values_from=Freq) |>
mutate(across(c("illicit", "licit"), ~sprintf("%.1f",.*100) )) |>
knitr::kable("markdown", caption="Percentages of primary substance of use, by discharge age group")Plot
| Var1 | illicit | licit |
|---|---|---|
| 18-29 | 83.1 | 16.9 |
| 30-44 | 68.2 | 31.8 |
| 45-59 | 36.4 | 63.6 |
| 60+ | 14.5 | 85.5 |
Age- and calendar-year patterns in standardised mortality ratios
Stantardized mortality rates- Direct
First, we formatted population data by calendar years grouped ages and sex. This allows us to introduce further the weights for each strata. We calculated the weights separately for each strata (WL), and jointly (proy_ine_reg_group_25_june_every).
Code
proy_ine_reg_group_25_june<-
proy_ine_com|>
#2025 to resemble SER 2024, BUT NOW 18+ INSTEAD OF 15
#2025-06-11: I need to expand so people older that stayed in treatment could fit
dplyr::filter(Edad>=18, Edad<76)|>
#format to match previous
dplyr::mutate(reg_res= sprintf("%02d", Region))|>
dplyr::ungroup()|>
dplyr::mutate(edad_anos_rec= dplyr::case_when(Edad>=18 & Edad<30~18,
Edad>=30 & Edad<45~30,
Edad>=45 & Edad<60~45,
Edad>=60 & Edad<76~60,
T~NA_real_))|>
dplyr::group_by(anio, `Sexo (1=Hombre 2=Mujer)`,edad_anos_rec)|>
dplyr::summarise(poblacion= sum(poblacion, na.rm=T))|>
dplyr::rename("sex"="Sexo (1=Hombre 2=Mujer)", "agegroup"="edad_anos_rec", "year"="anio")|>
dplyr::mutate(sex= ifelse(sex==2, "Female", "Male")) |>
ungroup()Code
# In the case that you employ more than one adjusting variable, separate weights should be passed to
# match to the levels of the different adjusting variables. When supplied correctly, "grand" weights
# are formed based on the variable-specific weights by multiplying over the variable-specific weights
# (e.g. if men have w = 0.5 and the age group 0-4 has w = 0.1, the "grand" weight for men aged 0-4
# is 0.5*0.1). The "grand" weights are then used for adjusting after ensuring they sum to one.
# When using multiple adjusting variables, you are allowed to pass either a named list of weights
# or a data.frame of weights. E.g.
# WL <- list(agegroup = age_w, sex = sex_w)
proy_ine_reg_group_25_june_sex<-
proy_ine_reg_group_25_june|>
#filter(year == 2020)|>
dplyr::summarise(pop = sum(poblacion), .by = sex) |>
mutate(w=pop/sum(pop))
proy_ine_reg_group_25_june_age<-
proy_ine_reg_group_25_june|>
#filter(year == 2020)|>
dplyr::summarise(pop = sum(poblacion), .by = agegroup) |>
mutate(w=pop/sum(pop))
proy_ine_reg_group_25_june_year<-
proy_ine_reg_group_25_june|>
#filter(year == 2020)|>
dplyr::summarise(pop = sum(poblacion), .by = year) |>
mutate(w=pop/sum(pop))
WL <- list(
year=proy_ine_reg_group_25_june_year$w,
agegroup = proy_ine_reg_group_25_june_age$w,
sex = proy_ine_reg_group_25_june_sex$w)
proy_ine_reg_group_25_june_every<-
proy_ine_reg_group_25_june|>
group_by(year, agegroup, sex)|>
dplyr::summarise(pop = sum(poblacion))|>
ungroup()|>
mutate(weights=pop/sum(pop))Code
weights_df <- data.table::as.data.table(proy_ine_reg_group_25_june_every)[
, .(year = as.integer(year), # num o int
agegroup = as.integer(agegroup),
sex = factor(sex, levels = c("Male","Female")), # ¡factor!
weights = as.numeric(weights)) # num
]
weights_adj_sex <- data.table::as.data.table(weights_df)[ ,
.(w = sum(weights)), # collapse over sex
by = .(year, agegroup)
]
#weights_adj_sex[, sum(w)] # should be 1We calculate dispersion parameter
Code
extract_phi_dir <- function(df) {
# Model with weights vector
model_poisson_weighted <- glm(
from0to1 ~ factor(agegroup) + factor(sex) + factor(year),
family = poisson,
offset = log(pyrs),
data = df
)
# Calculate φ
pearson_chisq <- sum(residuals(model_poisson_weighted, type = "pearson")^2)
df_residual <- df.residual(model_poisson_weighted)
dispersion_index <- pearson_chisq / df_residual
return(dispersion_index)
}
# Estimate φ with the same stratification used in the DSR, otherwise residual heterogeneity explodes the statistic.
# Use one φ per subgroup whenever you report subgroup-specific DSRs.
# Always include in the GLM every variable whose variation you do not want counted as dispersion.
# Here, I allow the mean to change with both calendar and age. However, residual degrees of freedom remain because you haven't included the interaction. To estimate overdispersion, I need some residual degrees of freedom: I'll either remove the interaction or just use the intercept within each group.
extract_phi_by_age <- function(df) {
df %>%
group_by(agegroup) %>%
dplyr::reframe({
# This entire block is now run for each group independently
# Fit the model ONLY on the data from the current group
m <- glm(from0to1 ~ factor(sex) + factor(year),
offset = log(pyrs),
family = poisson,
data = pick(everything())) # pick() correctly gets the group's data
# Return the result as a one-row tibble (or data.frame)
tibble(phi = sum(residuals(m, type = "pearson")^2) / df.residual(m))
})
}
extract_phi_by_sex <- function(df) {
df %>%
group_by(sex) %>%
dplyr::reframe({
# This entire block is now run for each group independently
# Fit the model ONLY on the data from the current group
m <- glm(from0to1 ~ factor(agegroup) + factor(year),
offset = log(pyrs),
family = poisson,
data = pick(everything())) # pick() correctly gets the group's data
# Return the result as a one-row tibble (or data.frame)
tibble(phi = sum(residuals(m, type = "pearson")^2) / df.residual(m))
})
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("Test of stratified DSR using Lexis and glm")
## 1. joint weights = w_year * w_age (no sex weight)
w_year <- proy_ine_reg_group_25_june_year %>% # year | w
transmute(year, w_year = w)
w_age <- proy_ine_reg_group_25_june_age %>% # agegroup | w
transmute(agegroup, w_age = w)
w_tab<-
crossing(w_year, w_age)|>
dplyr::mutate(w = w_year * w_age)|>
dplyr::select(year, agegroup, w)|>
dplyr::mutate(w = w / sum(w))
## 2. merge with Lexis table
lexis_w <- merge(data.table::as.data.table(c_SISTRAT_c1), w_tab,
by = c("year", "agegroup"), all = FALSE)
## 3. saturated model (every cell gets its own parameter)
mod_sat <- glm(
from0to1 ~ sex:factor(year):factor(agegroup) - 1, # sin intercepto
offset = log(pyrs),
family = poisson,
data = lexis_w
)
## 4. fitted stratum-specific rates = observed rates
## 5. direct standardised rate and CI
## 6. present per 1000 person-years like popEpi::rate()
lexis_w <- lexis_w %>% mutate(r_hat = fitted(mod_sat) / pyrs)
dsr_tab <- lexis_w %>%
group_by(sex) %>%
summarise(
DSR = sum(w * r_hat), #weight the predictor
var = sum(w^2 * from0to1 / pyrs^2),
.groups = "drop"
) %>%
mutate(
SE = sqrt(var),
LCI = DSR * exp(-qnorm(.975) * SE / DSR),
UCI = DSR * exp( qnorm(.975) * SE / DSR)
) %>%
mutate(across(c(DSR, LCI, UCI), ~ .x * 1000)) # por 1 000 py
cat("The result should be the same of rate_sex\n")
#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:
## 1. joint weights = w_year * w_age (no sex weight)
w_sex <- proy_ine_reg_group_25_june_sex %>% # agegroup | w
transmute(sex, w_sex = w)
w_tab_agegroup<-
crossing(w_year, w_sex)|>
dplyr::mutate(w = w_year * w_sex)|>
dplyr::select(year, sex, w)|>
dplyr::mutate(w = w / sum(w))
## 2. merge with Lexis table
lexis_w_age <- merge(data.table::as.data.table(c_SISTRAT_c1), w_tab_agegroup,
by = c("year", "sex"), all = FALSE)
## 3. saturated model (every cell gets its own parameter)
mod_sat_age <- glm(
from0to1 ~ sex:factor(year):factor(agegroup) - 1, # sin intercepto
offset = log(pyrs),
family = poisson,
data = lexis_w_age
)
## 4. fitted stratum-specific rates = observed rates
## 5. direct standardised rate and CI
## 6. present per 1000 person-years like popEpi::rate()
lexis_w_age <- lexis_w_age %>% mutate(r_hat = fitted(mod_sat_age) / pyrs)
dsr_tab_age <- lexis_w_age %>%
group_by(agegroup) %>%
summarise(
DSR = sum(w * r_hat), #weight the predictor
var = sum(w^2 * from0to1 / pyrs^2),
.groups = "drop"
) %>%
mutate(
SE = sqrt(var),
LCI = DSR * exp(-qnorm(.975) * SE / DSR),
UCI = DSR * exp( qnorm(.975) * SE / DSR)
) %>%
mutate(across(c(DSR, LCI, UCI), ~ .x * 1000)) # por 1 000 py
cat("The result should be the same of rate_agegroup\n")Now we calculate the directly-standardized mortality rates.
Code
r2_adj <- rate(
data = c_SISTRAT_c1,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k <- mapply(
dsr_format, # FUN
r2_adj$rate.adj, # primer vector (rate)
r2_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_1k_corr <- mapply(
dsr_format_corr, # FUN
r2_adj$rate.adj, # primer vector (rate)
r2_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj <- rate(
data = c_SISTRAT_c1,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
r2_adj_fot <- rate(
data = c_SISTRAT_c1_fot,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df, #weights inglm should be applied in the offset
print= "fot"
)
r2_sex_fot <- rate( data = c_SISTRAT_c1_fot,
obs = from0to1,
pyrs = pyrs,
print = c("sex","fot"),
adjust = c("year", "agegroup"),
weights = list(
year=proy_ine_reg_group_25_june_year$w,
agegroup = proy_ine_reg_group_25_june_age$w
#sex = proy_ine_reg_group_25_june_sex$w)
)
)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sex
r2_sex <- rate( data = c_SISTRAT_c1,
obs = from0to1,
pyrs = pyrs,
print = "sex",
adjust = c("year", "agegroup"),
weights = list(
year=proy_ine_reg_group_25_june_year$w,
agegroup = proy_ine_reg_group_25_june_age$w
#sex = proy_ine_reg_group_25_june_sex$w)
)
)
DSR_1k_sex <- mapply(
dsr_format, # FUN
r2_sex$rate.adj, # primer vector (rate)
r2_sex$SE.rate.adj, # segundo vector (se)
1,
MoreArgs = list( # argumentos fijos extra
#phi = 3.666913,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_1k_corr_sex <- mapply(
dsr_format_corr,
r2_sex$rate.adj,
r2_sex$SE.rate.adj,
phi = extract_phi_by_sex(c_SISTRAT_c1)$phi[match(r2_sex$sex, extract_phi_by_sex(c_SISTRAT_c1)$sex)],
MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; age
r2_agegr <- rate( data = c_SISTRAT_c1,
obs = from0to1,
pyrs = pyrs,
print = "agegroup",
adjust = c("year", "sex"),
weights = list(
year=proy_ine_reg_group_25_june_year$w,
#agegroup = proy_ine_reg_group_25_june_age$w
sex = proy_ine_reg_group_25_june_sex$w)
)
DSR_1k_agegr <- mapply(
dsr_format, # FUN
r2_agegr$rate.adj, # primer vector (rate)
r2_agegr$SE.rate.adj, # segundo vector (se)
1,
MoreArgs = list( # argumentos fijos extra
#phi = 3.666913,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_1k_corr_agegr <- mapply(
dsr_format_corr,
r2_agegr$rate.adj,
r2_agegr$SE.rate.adj,
phi = extract_phi_by_age(c_SISTRAT_c1)$phi[match(r2_agegr$agegroup, extract_phi_by_age(c_SISTRAT_c1)$agegroup)], # right φ
MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
r2_amb_adj <- rate(
data = c_SISTRAT_c1_amb,
obs = from0to1,
pyrs = pyrs,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_amb_1k <- mapply(
dsr_format, # FUN
r2_amb_adj$rate.adj, # primer vector (rate)
r2_amb_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_amb_1k_corr <- mapply(
dsr_format_corr, # FUN
r2_amb_adj$rate.adj, # primer vector (rate)
r2_amb_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_amb),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_res_adj <- rate(
data = c_SISTRAT_c1_res,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_res_1k <- mapply(
dsr_format, # FUN
r2_res_adj$rate.adj, # primer vector (rate)
r2_res_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_res_1k_corr <- mapply(
dsr_format_corr, # FUN
r2_res_adj$rate.adj, # primer vector (rate)
r2_res_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_res),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_illicit_adj <- rate(
data = c_SISTRAT_c1_illicit,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_illicit_1k <- mapply(
dsr_format, # FUN
r2_illicit_adj$rate.adj, # primer vector (rate)
r2_illicit_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_illicit_1k_corr <- mapply(
dsr_format_corr, # FUN
r2_illicit_adj$rate.adj, # primer vector (rate)
r2_illicit_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_illicit),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_licit_adj <- rate(
data = c_SISTRAT_c1_licit,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_licit_1k <- mapply(
dsr_format, # FUN
r2_licit_adj$rate.adj, # primer vector (rate)
r2_licit_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_licit_1k_corr <- mapply(
dsr_format_corr, # FUN
r2_licit_adj$rate.adj, # primer vector (rate)
r2_licit_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_licit),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_comp_adj <- rate(
data = c_SISTRAT_c1_comp,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_comp_1k <- mapply(
dsr_format, # FUN
r2_comp_adj$rate.adj, # primer vector (rate)
r2_comp_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_comp_1k_corr <- mapply(
dsr_format_corr, # FUN
r2_comp_adj$rate.adj, # primer vector (rate)
r2_comp_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_comp),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_notcomp_adj <- rate(
data = c_SISTRAT_c1_not_comp,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_notcomp_1k <- mapply(
dsr_format, # FUN
r2_notcomp_adj$rate.adj, # primer vector (rate)
r2_notcomp_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_notcomp_1k_corr <- mapply(
dsr_format_corr, # FUN
r2_notcomp_adj$rate.adj, # primer vector (rate)
r2_notcomp_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_not_comp),
factor = 1e3,
digits = 6,
conf = 0.95))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
rbind.data.frame(
cbind.data.frame(var="Total", t(r2_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_1k_corr),
cbind.data.frame(var=c("Male","Female"), matrix(r2_sex[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir=matrix(DSR_1k_corr_sex, ncol=1)),
cbind.data.frame(var=c("18-29","30-44", "45-59","60+"), matrix(r2_agegr[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir=matrix(DSR_1k_corr_agegr, ncol=1)),
cbind.data.frame(var="Ambulatory", t(r2_amb_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_amb_1k_corr),
cbind.data.frame(var="Residential", t(r2_res_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_res_1k_corr),
cbind.data.frame(var="Illicit", t(r2_illicit_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_illicit_1k_corr),
cbind.data.frame(var="Licit", t(r2_licit_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_licit_1k_corr),
cbind.data.frame(var="Completed", t(r2_comp_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_comp_1k_corr),
cbind.data.frame(var="Not completed", t(r2_notcomp_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_notcomp_1k_corr)#,
)|>
(\(df) {
df->> df_smr_dir_a
df
})()|>
dplyr::mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|>
dplyr::mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
dplyr::select(-any_of(2:7))|>
extract(
SMR_dir,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
dplyr::rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci") |>
knitr::kable("markdown", caption= "SMRs, direct method")| var | CMR | DSR | DSR (SEs robust to dispersion) |
|---|---|---|---|
| Total | 8.5 (8.2–8.8) | 13.1 (9.0–19.2) | 13.1 (8.8–19.5) |
| Male | 8.9 (8.6–9.3) | 18.5 (9.5–35.7) | 18.5 (8.1–42.0) |
| Female | 7.0 (6.5–7.6) | 9.1 (7.5–11.0) | 9.1 (7.7–10.6) |
| 18-29 | 2.9 (2.5–3.3) | 2.9 (2.4–3.4) | 2.9 (2.5–3.4) |
| 30-44 | 5.8 (5.5–6.2) | 5.7 (4.8–6.7) | 5.7 (4.7–6.9) |
| 45-59 | 16.4 (15.5–17.3) | 14.2 (12.9–15.7) | 14.2 (13.4–15.1) |
| 60+ | 31.5 (28.4–34.9) | 46.7 (20.7–105.4) | 46.7 (21.2–102.9) |
| Ambulatory | 8.2 (7.9–8.5) | 13.2 (9.0–19.3) | 13.2 (8.9–19.5) |
| Residential | 10.0 (9.2–10.9) | 10.4 (8.9–12.0) | 10.3 (9.1–11.8) |
| Illicit | 5.1 (4.8–5.4) | 12.1 (5.8–25.3) | 12.1 (4.6–31.3) |
| Licit | 16.2 (15.4–16.9) | 14.7 (12.8–16.8) | 14.7 (12.9–16.7) |
| Completed | 8.1 (7.5–8.7) | 8.0 (6.5–9.8) | 8.0 (6.6–9.6) |
| Not completed | 8.6 (8.3–8.9) | 15.2 (10.4–22.2) | 15.2 (10.4–22.2) |
CMRs do not have 95% CI corrected for dispersion.
We replicate the analysis for the last treatment.
Code
invisible("B) Last treatment")
r2_b_adj <- rate(
data = c_SISTRAT_c1_b,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_1k_b_corr <- mapply(
dsr_format_corr, # FUN
r2_b_adj$rate.adj, # primer vector (rate)
r2_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_b),
factor = 1e3,
digits = 6,
conf = 0.95))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sensitivity; sex
r2_sex_b <- rate( data = c_SISTRAT_c1_b,
obs = from0to1,
pyrs = pyrs,
print = "sex",
adjust = c("year", "agegroup"),
weights = list(
year=proy_ine_reg_group_25_june_year$w,
agegroup = proy_ine_reg_group_25_june_age$w
#sex = proy_ine_reg_group_25_june_sex$w)
)
)
DSR_1k_corr_sex_b <- mapply(
dsr_format_corr,
r2_sex_b$rate.adj,
r2_sex_b$SE.rate.adj,
phi = extract_phi_by_sex(c_SISTRAT_c1_b)$phi[match(r2_sex_b$sex, extract_phi_by_sex(c_SISTRAT_c1_b)$sex)],
MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sensitivity; age
r2_agegr_b <- rate( data = c_SISTRAT_c1_b,
obs = from0to1,
pyrs = pyrs,
print = "agegroup",
adjust = c("year", "sex"),
weights = list(
year=proy_ine_reg_group_25_june_year$w,
#agegroup = proy_ine_reg_group_25_june_age$w
sex = proy_ine_reg_group_25_june_sex$w)
)
DSR_1k_corr_agegr_b <- mapply(
dsr_format_corr,
r2_agegr_b$rate.adj,
r2_agegr_b$SE.rate.adj,
phi = extract_phi_by_age(c_SISTRAT_c1_b)$phi[match(r2_agegr_b$agegroup, extract_phi_by_age(c_SISTRAT_c1_b)$agegroup)], # right φ
MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
r2_amb_b_adj <- rate(
data = c_SISTRAT_c1_b_amb,
obs = from0to1,
pyrs = pyrs,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_amb_1k_b <- mapply(
dsr_format, # FUN
r2_amb_b_adj$rate.adj, # primer vector (rate)
r2_amb_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_amb_1k_b_corr <- mapply(
dsr_format_corr, # FUN
r2_amb_b_adj$rate.adj, # primer vector (rate)
r2_amb_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_b_amb),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_res_b_adj <- rate(
data = c_SISTRAT_c1_b_res,
obs = from0to1,
pyrs = pyrs,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_res_1k_b <- mapply(
dsr_format, # FUN
r2_res_b_adj$rate.adj, # primer vector (rate)
r2_res_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_res_1k_b_corr <- mapply(
dsr_format_corr, # FUN
r2_res_b_adj$rate.adj, # primer vector (rate)
r2_res_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_b_res),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_illicit_b_adj <- rate(
data = c_SISTRAT_c1_b_illicit,
obs = from0to1,
pyrs = pyrs,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_illicit_1k_b <- mapply(
dsr_format, # FUN
r2_illicit_b_adj$rate.adj, # primer vector (rate)
r2_illicit_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_illicit_1k_b_corr <- mapply(
dsr_format_corr, # FUN
r2_illicit_b_adj$rate.adj, # primer vector (rate)
r2_illicit_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_b_illicit),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_licit_b_adj <- rate(
data = c_SISTRAT_c1_b_licit,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_licit_1k_b <- mapply(
dsr_format, # FUN
r2_licit_b_adj$rate.adj, # primer vector (rate)
r2_licit_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_licit_1k_b_corr <- mapply(
dsr_format_corr, # FUN
r2_licit_b_adj$rate.adj, # primer vector (rate)
r2_licit_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_b_licit),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_comp_b_adj <- rate(
data = c_SISTRAT_c1_b_comp,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_comp_1k_b <- mapply(
dsr_format, # FUN
r2_comp_b_adj$rate.adj, # primer vector (rate)
r2_comp_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_comp_1k_b_corr <- mapply(
dsr_format_corr, # FUN
r2_comp_b_adj$rate.adj, # primer vector (rate)
r2_comp_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_b_comp),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_notcomp_b_adj <- rate(
data = c_SISTRAT_c1_b_not_comp,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_notcomp_1k_b <- mapply(
dsr_format, # FUN
r2_notcomp_b_adj$rate.adj, # primer vector (rate)
r2_notcomp_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_notcomp_1k_b_corr <- mapply(
dsr_format_corr, # FUN
r2_notcomp_b_adj$rate.adj, # primer vector (rate)
r2_notcomp_b_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_b_not_comp),
factor = 1e3,
digits = 6,
conf = 0.95))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
rbind.data.frame(
cbind.data.frame(var="Total", t(r2_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_b_corr),
cbind.data.frame(var=c("Male","Female"), matrix(r2_sex_b[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir= matrix(DSR_1k_corr_sex_b, ncol=1)),
cbind.data.frame(var=c("18-29","30-44", "45-59","60+"), matrix(r2_agegr_b[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir= matrix(DSR_1k_corr_agegr_b, ncol=1)),
cbind.data.frame(var="Ambulatory", t(r2_amb_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_amb_1k_b_corr),
cbind.data.frame(var="Residential", t(r2_res_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_res_1k_b_corr),
cbind.data.frame(var="Illicit", t(r2_illicit_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_illicit_1k_b_corr),
cbind.data.frame(var="Licit", t(r2_licit_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_licit_1k_b_corr),
cbind.data.frame(var="Completed", t(r2_comp_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_comp_1k_b_corr),
cbind.data.frame(var="Not completed", t(r2_notcomp_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_notcomp_1k_b_corr)
)|>
(\(df) {
df->> df_smr_dir_b
df
})()|>
mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|>
mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
dplyr::select(-any_of(2:7))|>
extract(
SMR_dir,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci")|>
knitr::kable("markdown", caption= "SMRs, direct method, last treatment followed")| var | CMR | DSR | DSR (SEs robust to dispersion) |
|---|---|---|---|
| Total | 9.5 (9.2–9.9) | 15.3 (10.1–23.1) | 15.3 (10.0–23.3) |
| Male | 10.0 (9.6–10.4) | 21.9 (10.8–44.6) | 21.9 (9.1–53.0) |
| Female | 8.0 (7.4–8.7) | 10.3 (8.4–12.5) | 10.3 (8.8–12.0) |
| 18-29 | 3.3 (2.9–3.8) | 3.6 (3.0–4.5) | 3.6 (3.1–4.3) |
| 30-44 | 6.7 (6.3–7.1) | 7.3 (5.8–9.0) | 7.3 (5.7–9.2) |
| 45-59 | 17.8 (16.9–18.8) | 16.4 (14.7–18.2) | 16.4 (15.3–17.4) |
| 60+ | 32.7 (29.5–36.3) | 53.3 (21.5–132.2) | 53.3 (24.1–117.7) |
| Ambulatory | 9.1 (8.8–9.5) | 15.1 (9.9–23.0) | 15.1 (9.8–23.1) |
| Residential | 11.7 (10.8–12.7) | 12.7 (10.8–14.9) | 12.7 (11.1–14.5) |
| Illicit | 5.7 (5.4–6.0) | 15.1 (6.4–35.9) | 15.1 (5.0–45.3) |
| Licit | 18.1 (17.3–19.0) | 17.1 (14.7–19.9) | 17.1 (14.7–20.0) |
| Completed | 8.8 (8.2–9.5) | 9.1 (7.3–11.4) | 9.1 (7.6–11.0) |
| Not completed | 9.8 (9.4–10.2) | 17.5 (11.6–26.5) | 17.5 (11.6–26.5) |
CMRs do not have 95% CI corrected for dispersion.
Now we included unfinished treatments (referral outside SENDA network and ongoing treatments)
Code
invisible("C) Last treatment")
r2_c_adj <- rate(
data = c_SISTRAT_c1_c,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_1k_c_corr <- mapply(
dsr_format_corr, # FUN
r2_c_adj$rate.adj, # primer vector (rate)
r2_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_c),
factor = 1e3,
digits = 6,
conf = 0.95))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sensitivity; sex
r2_sex_c <- rate( data = c_SISTRAT_c1_c,
obs = from0to1,
pyrs = pyrs,
print = "sex",
adjust = c("year", "agegroup"),
weights = list(
year=proy_ine_reg_group_25_june_year$w,
agegroup = proy_ine_reg_group_25_june_age$w
#sex = proy_ine_reg_group_25_june_sex$w)
)
)
DSR_1k_corr_sex_c <- mapply(
dsr_format_corr,
r2_sex_c$rate.adj,
r2_sex_c$SE.rate.adj,
phi = extract_phi_by_sex(c_SISTRAT_c1_c)$phi[match(r2_sex_c$sex, extract_phi_by_sex(c_SISTRAT_c1_c)$sex)],
MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sensitivity; age
r2_agegr_c <- rate( data = c_SISTRAT_c1_c,
obs = from0to1,
pyrs = pyrs,
print = "agegroup",
adjust = c("year", "sex"),
weights = list(
year=proy_ine_reg_group_25_june_year$w,
#agegroup = proy_ine_reg_group_25_june_age$w
sex = proy_ine_reg_group_25_june_sex$w)
)
DSR_1k_corr_agegr_c <- mapply(
dsr_format_corr,
r2_agegr_c$rate.adj,
r2_agegr_c$SE.rate.adj,
phi = extract_phi_by_age(c_SISTRAT_c1_c)$phi[match(r2_agegr_c$agegroup, extract_phi_by_age(c_SISTRAT_c1_c)$agegroup)], # right φ
MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
r2_amb_c_adj <- rate(
data = c_SISTRAT_c1_c_amb,
obs = from0to1,
pyrs = pyrs,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_amb_1k_c <- mapply(
dsr_format, # FUN
r2_amb_c_adj$rate.adj, # primer vector (rate)
r2_amb_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_amb_1k_c_corr <- mapply(
dsr_format_corr, # FUN
r2_amb_c_adj$rate.adj, # primer vector (rate)
r2_amb_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_c_amb),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_res_c_adj <- rate(
data = c_SISTRAT_c1_c_res,
obs = from0to1,
pyrs = pyrs,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_res_1k_c <- mapply(
dsr_format, # FUN
r2_res_c_adj$rate.adj, # primer vector (rate)
r2_res_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_res_1k_c_corr <- mapply(
dsr_format_corr, # FUN
r2_res_c_adj$rate.adj, # primer vector (rate)
r2_res_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_c_res),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_illicit_c_adj <- rate(
data = c_SISTRAT_c1_c_illicit,
obs = from0to1,
pyrs = pyrs,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_illicit_1k_c <- mapply(
dsr_format, # FUN
r2_illicit_c_adj$rate.adj, # primer vector (rate)
r2_illicit_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_illicit_1k_c_corr <- mapply(
dsr_format_corr, # FUN
r2_illicit_c_adj$rate.adj, # primer vector (rate)
r2_illicit_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_c_illicit),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_licit_c_adj <- rate(
data = c_SISTRAT_c1_c_licit,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_licit_1k_c <- mapply(
dsr_format, # FUN
r2_licit_c_adj$rate.adj, # primer vector (rate)
r2_licit_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_licit_1k_c_corr <- mapply(
dsr_format_corr, # FUN
r2_licit_c_adj$rate.adj, # primer vector (rate)
r2_licit_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_c_licit),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_comp_c_adj <- rate(
data = c_SISTRAT_c1_c_comp,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_comp_1k_c <- mapply(
dsr_format, # FUN
r2_comp_c_adj$rate.adj, # primer vector (rate)
r2_comp_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_comp_1k_c_corr <- mapply(
dsr_format_corr, # FUN
r2_comp_c_adj$rate.adj, # primer vector (rate)
r2_comp_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_c_comp),
factor = 1e3,
digits = 6,
conf = 0.95))
#_______#_______#_______#_______#
r2_notcomp_c_adj <- rate(
data = c_SISTRAT_c1_c_not_comp,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df
)
DSR_notcomp_1k_c <- mapply(
dsr_format, # FUN
r2_notcomp_c_adj$rate.adj, # primer vector (rate)
r2_notcomp_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = 1,
factor = 1e3,
digits = 1,
conf = 0.95))
DSR_notcomp_1k_c_corr <- mapply(
dsr_format_corr, # FUN
r2_notcomp_c_adj$rate.adj, # primer vector (rate)
r2_notcomp_c_adj$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_c_not_comp),
factor = 1e3,
digits = 6,
conf = 0.95))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
rbind.data.frame(
cbind.data.frame(var="Total", t(r2_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_c_corr),
cbind.data.frame(var=c("Male","Female"), matrix(r2_sex_c[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir= matrix(DSR_1k_corr_sex_c, ncol=1)),
cbind.data.frame(var=c("18-29","30-44", "45-59","60+"), matrix(r2_agegr_c[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir= matrix(DSR_1k_corr_agegr_c, ncol=1)),
cbind.data.frame(var="Ambulatory", t(r2_amb_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_amb_1k_c_corr),
cbind.data.frame(var="Residential", t(r2_res_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_res_1k_c_corr),
cbind.data.frame(var="Illicit", t(r2_illicit_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_illicit_1k_c_corr),
cbind.data.frame(var="Licit", t(r2_licit_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_licit_1k_c_corr),
cbind.data.frame(var="Completed", t(r2_comp_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_comp_1k_c_corr),
cbind.data.frame(var="Not completed", t(r2_notcomp_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_notcomp_1k_c_corr)
)|>
(\(df) {
df->> df_smr_dir_c
df
})()|>
mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|>
mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
dplyr::select(-any_of(2:7))|>
extract(
SMR_dir,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci")|>
knitr::kable("markdown", caption= "SMRs, direct method, unfinished treatments")| var | CMR | DSR | DSR (SEs robust to dispersion) |
|---|---|---|---|
| Total | 9.2 (8.9–9.5) | 12.7 (10.7–15.1) | 12.7 (10.9–14.9) |
| Male | 9.7 (9.4–10.1) | 16.6 (12.3–22.5) | 16.6 (12.2–22.7) |
| Female | 7.6 (7.1–8.1) | 9.5 (8.1–11.0) | 9.5 (8.4–10.6) |
| 18-29 | 3.1 (2.7–3.5) | 2.9 (2.5–3.4) | 2.9 (2.5–3.4) |
| 30-44 | 6.3 (6.0–6.7) | 6.2 (5.4–7.1) | 6.2 (5.5–7.0) |
| 45-59 | 17.8 (17.0–18.7) | 16.4 (14.9–18.1) | 16.4 (15.4–17.5) |
| 60+ | 34.9 (31.8–38.2) | 37.5 (24.6–57.2) | 37.5 (26.4–53.3) |
| Ambulatory | 8.9 (8.6–9.2) | 13.6 (10.1–18.3) | 13.6 (10.2–18.2) |
| Residential | 10.7 (10.0–11.6) | 12.2 (10.5–14.2) | 12.2 (10.8–13.9) |
| Illicit | 5.4 (5.1–5.7) | 11.6 (6.3–21.5) | 11.6 (5.4–25.0) |
| Licit | 18.0 (17.2–18.7) | 16.6 (14.9–18.5) | 16.6 (15.1–18.3) |
| Completed | 9.2 (8.7–9.7) | 10.0 (9.0–11.3) | 10.0 (9.1–11.1) |
| Not completed | 9.1 (8.8–9.5) | 16.3 (11.4–23.2) | 16.3 (11.6–22.9) |
Code
# sr_1_sex_fot
# r2_sex_fot
rates_df_fot <- r2_adj_fot %>%
mutate(
# conviene expresar la tasa por 1 000 persona-año
rate_adj_1k = rate.adj * 1e3,
rate_adj_lo_1k = rate.adj.lo * 1e3,
rate_adj_hi_1k = rate.adj.hi * 1e3
)
rates_sex_df_fot <- r2_sex_fot %>%
mutate(
# conviene expresar la tasa por 1 000 persona-año
rate_adj_1k = rate.adj * 1e3,
rate_adj_lo_1k = rate.adj.lo * 1e3,
rate_adj_hi_1k = rate.adj.hi * 1e3
)
p_rate2 <- ggplot(rates_sex_df_fot, aes(x = fot, y = rate_adj_1k, fill=sex)) +
geom_ribbon(aes(ymin = rate_adj_lo_1k, ymax = rate_adj_hi_1k, fill=sex),
alpha = .20) +
geom_line(aes(color=sex), size = .9) +
geom_point(size = 2) +
scale_x_continuous("Years since discharge",
breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25))) +
scale_y_continuous("Adjusted rate \n(deaths ×1,000 PY)",
limits = c(0, NA)) +
theme_minimal(base_family = "Times New Roman")+
theme(axis.title.x = element_blank())+scale_colour_manual(
values = c(Male = "#2C3E8B", Female = "#E69F00")
) +
scale_fill_manual(
values = c(Male = alpha("#2C3E8B", 0.25),
Female = alpha("#E69F00", 0.25))
)
p_sir2 <- ggplot(sr_1_sex_fot, aes(x = fot, y = sir, fill= sex)) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "grey50") +
geom_ribbon(aes(ymin = sir.lo, ymax = sir.hi, fill=sex),
alpha = .20) +
geom_line(aes(color=sex), size = .9) +
geom_point(size = 2) +
scale_x_continuous("Years since discharge",
breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25))) +
theme_minimal(base_family = "Times New Roman")+
theme(axis.title.x = element_blank())+
scale_y_log10(
"Adjusted SMR",
breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25)),
labels = scales::number_format(accuracy = 0.1)
) +scale_colour_manual(
values = c(Male = "#2C3E8B", Female = "#E69F00")
) +
scale_fill_manual(
values = c(Male = alpha("#2C3E8B", 0.25),
Female = alpha("#E69F00", 0.25))
)+ theme(legend.position="none")
# scale_y_continuous(trans = "exp", breaks = c(0,.5, 1, 1.5,2),
# name = "Adjusted SMR") # removes the need for readers to exponentiate
legend_shared <- ggpubr::get_legend(
p_rate2 + theme(legend.position = "bottom")+ guides(fill=guide_legend(title="Sex"), color=guide_legend(title="Sex")) # basta un panel
)
panels <- plot_grid(
p_rate2+ theme(legend.position="none"), p_sir2+ theme(legend.position="none"),
ncol = 1,
labels = c("a", "b"),
label_size = 14,
label_fontfamily = "Times New Roman",
align = "v", # alinea verticalmente
axis = "l", # toma eje izquierdo como referencia
label_x = 0, # esquina izq.
label_y = 1,
hjust = -0.1,
vjust = 1.2
)
# Etiqueta global del eje-x
xlab_shared <- ggdraw() +
draw_label("Years since discharge",
fontfamily = "Times New Roman",
fontface = "plain", size = 12, hjust = 0.5)
# Figura final (ajusta rel_heights si necesitas más/menos espacio)
final_plot <- plot_grid(
panels,
xlab_shared,
legend_shared,
ncol = 1,
rel_heights = c(1, 0.06, 0.10) # ajusta espacio a tu gusto
)
# Mostrar o guardar
print(final_plot)
#ggsave(paste0(gsub("/cons","",getwd()),"/cons/_figs/Figure_1_rates_and_SIR_by_fot.png"), dpi = 600, width = 6*.9, height = 7*.9)
figexp<- 1.5
deinflar_word <- 1/1.07653631284916
ggsave(
paste0(gsub("/cons","",getwd()), "/cons/_figs/Figure_1_rates_and_SIR_by_fot.pdf"),
dpi = 600,
width = 80 *figexp, # Target width in mm (directly from journal instructions)
height = 80 * figexp* (7/6), # Adjust height proportionally based on your original plot ratio
units = "mm",
device = cairo_pdf # This is the key to fixing the font error
)
ggsave(
paste0(gsub("/cons","",getwd()), "/cons/_figs/Figure_1_rates_and_SIR_by_fot.png"),
dpi = 600,
width = 80 *figexp*deinflar_word, # Target width in mm (directly from journal instructions)
height = 80 * figexp*deinflar_word* (7/6), # Adjust height proportionally based on your original plot ratio
units = "mm"#,
#device = cairo_pdf # This is the key to fixing the font error
)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
c_SISTRAT_c1_c_fot <- lexpand( clean_df_c,
status = status,
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#fot=0:10,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76), fot = c(0, .0386,.2465, .5, 1, 3, 5, 7, 9, Inf)),
aggre = list(agegroup = age, year = per, sex= sex_rec, fot= fot) )
sir_tot_fot_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_fot, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded,
ref.rate = 'haz',
print="fot",
adjust = c('agegroup','year','sex'),
EAR=T)#Excess Absolute Risks
sir_tot_fot
sr_1_sex_fot_c <- popEpi::sir(c_SISTRAT_c1_c_fot, coh.obs = 'from0to1',
coh.pyrs = 'pyrs',
ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
ref.rate = haz,
print = c("sex", "fot"),
adjust = c("agegroup", "sex", "year"),
test.type = "homogeneity",
conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
conf.level = 0.95, EAR = T)
r2_adj_fot_c <- rate(
data = c_SISTRAT_c1_c_fot,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df, #weights inglm should be applied in the offset
print= "fot"
)
r2_sex_fot_c <- rate( data = c_SISTRAT_c1_c_fot,
obs = from0to1,
pyrs = pyrs,
print = c("sex","fot"),
adjust = c("year", "agegroup"),
weights = list(
year=proy_ine_reg_group_25_june_year$w,
agegroup = proy_ine_reg_group_25_june_age$w
#sex = proy_ine_reg_group_25_june_sex$w)
)
)
rates_df_fot_c <- r2_adj_fot_c %>%
mutate(
# conviene expresar la tasa por 1 000 persona-año
rate_adj_1k = rate.adj * 1e3,
rate_adj_lo_1k = rate.adj.lo * 1e3,
rate_adj_hi_1k = rate.adj.hi * 1e3
)
rates_sex_df_fot_c <- r2_sex_fot_c %>%
mutate(
# conviene expresar la tasa por 1 000 persona-año
rate_adj_1k = rate.adj * 1e3,
rate_adj_lo_1k = rate.adj.lo * 1e3,
rate_adj_hi_1k = rate.adj.hi * 1e3
)
p_rate2_c <- ggplot(rates_sex_df_fot_c, aes(x = fot, y = rate_adj_1k, fill=sex)) +
geom_ribbon(aes(ymin = rate_adj_lo_1k, ymax = rate_adj_hi_1k, fill=sex),
alpha = .20) +
geom_line(aes(color=sex), size = .9) +
geom_point(size = 2) +
scale_x_continuous("Years since discharge",
breaks = setdiff(round(rates_df_fot_c$fot,2),c(0.04, 0.25))) +
scale_y_continuous("Adjusted rate \n(deaths ×1,000 PY)",
limits = c(0, NA)) +
theme_minimal()+
theme(axis.title.x = element_blank())+scale_colour_manual(
values = c(Male = "#2C3E8B", Female = "#E69F00")
) +
scale_fill_manual(
values = c(Male = alpha("#2C3E8B", 0.25),
Female = alpha("#E69F00", 0.25))
)
p_sir2_c <- ggplot(sr_1_sex_fot_c, aes(x = fot, y = sir, fill= sex)) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "grey50") +
geom_ribbon(aes(ymin = sir.lo, ymax = sir.hi, fill=sex),
alpha = .20) +
geom_line(aes(color=sex), size = .9) +
geom_point(size = 2) +
scale_x_continuous("Years since discharge",
breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25))) +
theme_minimal()+
theme(axis.title.x = element_blank())+
scale_y_log10(
"Adjusted SMR",
breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25)),
labels = scales::number_format(accuracy = 0.1)
) +scale_colour_manual(
values = c(Male = "#2C3E8B", Female = "#E69F00")
) +
scale_fill_manual(
values = c(Male = alpha("#2C3E8B", 0.25),
Female = alpha("#E69F00", 0.25))
)+ theme(legend.position="none")
# scale_y_continuous(trans = "exp", breaks = c(0,.5, 1, 1.5,2),
# name = "Adjusted SMR") # removes the need for readers to exponentiate
legend_shared_c <- get_legend(
p_rate2_c + theme(legend.position = "bottom") # basta un panel
)
panels_c <- plot_grid(
p_rate2_c+ theme(legend.position="none"), p_sir2_c+ theme(legend.position="none"),
ncol = 1,
align = "v",
axis = "l"
)
# Etiqueta global del eje-x
xlab_shared_c <- ggdraw() +
draw_label("Years since discharge",
fontface = "plain", size = 12, hjust = 0.5)
# Figura final (ajusta rel_heights si necesitas más/menos espacio)
final_plot_c <- plot_grid(
panels_c,
xlab_shared_c,
legend_shared_c,
ncol = 1,
rel_heights = c(1, 0.06, 0.10) # ajusta espacio a tu gusto
)
# Mostrar o guardar
print(final_plot_c)
ggsave(paste0(gsub("/cons","",getwd()),"/cons/_figs/rates_and_SIR_by_fot_c.png"), dpi = 600, width = 7, height = 4.5)SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile)
Test for homogeneity: p < 0.001
Total sir: 3.59 (3.46-3.72)
Total observed: 2996
Total expected: 834.72
Total person-years: 353826
Clave <fot>
fot observed expected pyrs sir sir.lo sir.hi p_value EAR
<num> <num> <num> <num> <num> <num> <num> <num> <num>
1: 0.0000 49 5.66 2700.92 8.66 6.46 11.32 0 16.047
2: 0.0386 135 30.31 14408.71 4.45 3.74 5.25 0 7.266
3: 0.2465 135 36.67 17287.71 3.68 3.09 4.34 0 5.688
4: 0.5000 239 71.96 33364.72 3.32 2.92 3.76 0 5.006
5: 1.0000 1028 264.11 116421.81 3.89 3.66 4.14 0 6.561
6: 3.0000 696 205.71 84817.22 3.38 3.14 3.64 0 5.780
7: 5.0000 452 133.75 52809.05 3.38 3.08 3.70 0 6.026
8: 7.0000 209 69.15 26021.20 3.02 2.63 3.45 0 5.374
9: 9.0000 53 17.40 5994.64 3.05 2.30 3.94 0 5.939
Heterogeneity
Code
sep_ind_a<-
tibble::tibble(type= "Indirect, main", raw = df_smr_ind$SMR) %>%
extract(
raw,
into = c("estimate", "lower", "upper"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::select(type, estimate, lower, upper)
sep_ind_b<-
tibble::tibble(type= "Indirect, sens", raw = df_smr_ind_b$SMR) %>%
extract(
raw,
into = c("estimate", "lower", "upper"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::select(type, estimate, lower, upper)
sep_dir_a<-
tibble(type= "Indirect, main", raw = df_smr_dir_a$SMR_dir) %>%
extract(
raw,
into = c("estimate", "lower", "upper"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::select(type, estimate, lower, upper)
sep_dir_b<-
tibble::tibble(type= "Indirect, sens", raw = df_smr_dir_b$SMR_dir) %>%
extract(
raw,
into = c("estimate", "lower", "upper"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::select(type, estimate, lower, upper)
sep_ind_c<-
tibble::tibble(type= "Indirect, main", raw = df_smr_ind_c$SMR) %>%
extract(
raw,
into = c("estimate", "lower", "upper"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::select(type, estimate, lower, upper)
sep_dir_c<-
tibble::tibble(type= "Indirect, sens", raw = df_smr_dir_c$SMR_dir) %>%
extract(
raw,
into = c("estimate", "lower", "upper"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::select(type, estimate, lower, upper)
cat("SMRs, indirect, main")
cat("Between sex\n")
variances_ind_a_sex <- ((log(sep_ind_a$upper[2:3]) - log(sep_ind_a$estimate[2:3])) / qnorm(0.975))^2
meta_fe_ind_a_fe_sex <- rma(yi = log(sep_ind_a$estimate[2:3]), sei = sqrt(variances_ind_a_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_ind_a_fe_sex
cat("Between ages\n")
variances_ind_a_agegroup <- ((log(sep_ind_a$upper[4:7]) - log(sep_ind_a$estimate[4:7])) / qnorm(0.975))^2
meta_fe_ind_a_fe_agegroup <- rma(yi = log(sep_ind_a$estimate[4:7]), sei = sqrt(variances_ind_a_agegroup), method = "FE")
meta_fe_ind_a_fe_agegroup
cat("Between settings\n")
variances_ind_a_setting <- ((log(sep_ind_a$upper[8:9]) - log(sep_ind_a$estimate[8:9])) / qnorm(0.975))^2
meta_fe_ind_a_fe_setting <- rma(yi = log(sep_ind_a$estimate[8:9]), sei = sqrt(variances_ind_a_setting), method = "FE")
meta_fe_ind_a_fe_setting
cat("Between primary substance\n")
variances_ind_a_licit <- ((log(sep_ind_a$upper[10:11]) - log(sep_ind_a$estimate[10:11])) / qnorm(0.975))^2
meta_fe_ind_a_fe_licit <- rma(yi = log(sep_ind_a$estimate[10:11]), sei = sqrt(variances_ind_a_licit), method = "FE")
meta_fe_ind_a_fe_licit
cat("Between completed and non-completed treatments\n")
variances_ind_a_comp <- ((log(sep_ind_a$upper[12:13]) - log(sep_ind_a$estimate[12:13])) / qnorm(0.975))^2
meta_fe_ind_a_fe_comp <- rma(yi = log(sep_ind_a$estimate[12:13]), sei = sqrt(variances_ind_a_comp), method = "FE")
meta_fe_ind_a_fe_comp
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("SMRs, indirect, main")
cat("Between sex\n")
variances_dir_a_sex <- ((log(sep_dir_a$upper[2:3]) - log(sep_dir_a$estimate[2:3])) / qnorm(0.975))^2
meta_fe_dir_a_fe_sex <- rma(yi = log(sep_dir_a$estimate[2:3]), sei = sqrt(variances_dir_a_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_dir_a_fe_sex
cat("Between ages\n")
variances_dir_a_agegroup <- ((log(sep_dir_a$upper[4:7]) - log(sep_dir_a$estimate[4:7])) / qnorm(0.975))^2
meta_fe_dir_a_fe_agegroup <- rma(yi = log(sep_dir_a$estimate[4:7]), sei = sqrt(variances_dir_a_agegroup), method = "FE")
meta_fe_dir_a_fe_agegroup
cat("Between settings\n")
variances_dir_a_setting <- ((log(sep_dir_a$upper[8:9]) - log(sep_dir_a$estimate[8:9])) / qnorm(0.975))^2
meta_fe_dir_a_fe_setting <- rma(yi = log(sep_dir_a$estimate[8:9]), sei = sqrt(variances_dir_a_setting), method = "FE")
meta_fe_dir_a_fe_setting
cat("Between primary substance\n")
variances_dir_a_licit <- ((log(sep_dir_a$upper[10:11]) - log(sep_dir_a$estimate[10:11])) / qnorm(0.975))^2
meta_fe_dir_a_fe_licit <- rma(yi = log(sep_dir_a$estimate[10:11]), sei = sqrt(variances_dir_a_licit), method = "FE")
meta_fe_dir_a_fe_licit
cat("Between completed and non-completed treatments\n")
variances_dir_a_comp <- ((log(sep_dir_a$upper[12:13]) - log(sep_dir_a$estimate[12:13])) / qnorm(0.975))^2
meta_fe_dir_a_fe_comp <- rma(yi = log(sep_dir_a$estimate[12:13]), sei = sqrt(variances_dir_a_comp), method = "FE")
meta_fe_dir_a_fe_comp
bind_rows(
cbind.data.frame(type= "Main", comp= "Sex", Q= meta_fe_ind_a_fe_sex$QE, p= meta_fe_ind_a_fe_sex$QEp, Q_b= meta_fe_dir_a_fe_sex$QE, p_b= meta_fe_dir_a_fe_sex$QEp),
cbind.data.frame(type= "Main", comp= "Age groups", Q= meta_fe_ind_a_fe_agegroup$QE, p= meta_fe_ind_a_fe_agegroup$QEp, Q_b= meta_fe_dir_a_fe_agegroup$QE, p_b= meta_fe_dir_a_fe_agegroup$QEp),
cbind.data.frame(type= "Main", comp= "Setting", Q= meta_fe_ind_a_fe_setting$QE, p= meta_fe_ind_a_fe_setting$QEp, Q_b= meta_fe_dir_a_fe_setting$QE, p_b= meta_fe_dir_a_fe_setting$QEp),
cbind.data.frame(type= "Main", comp= "Primary substance", Q= meta_fe_ind_a_fe_licit$QE, p= meta_fe_ind_a_fe_licit$QEp, Q_b= meta_fe_dir_a_fe_licit$QE, p_b= meta_fe_dir_a_fe_licit$QEp),
cbind.data.frame(type= "Main", comp= "Tr. compliance status", Q= meta_fe_ind_a_fe_comp$QE, p= meta_fe_ind_a_fe_comp$QEp, Q_b= meta_fe_dir_a_fe_comp$QE, p_b= meta_fe_dir_a_fe_comp$QEp)
)|>
mutate(
Qa_SMR = case_when(
str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q, p),
TRUE ~ sprintf("Q %.2f (df=1), p=%.3f", Q, p)
),
Qa_DSR = case_when(
str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q_b, p_b),
TRUE ~ sprintf("Q %.2f (df=1), p=%.3f", Q_b, p_b)
)
) |> dplyr::select(type, comp, Qa_SMR, Qa_DSR) |>
knitr::kable("markdown", caption= "Heterogeneity, main")SMRs, indirect, mainBetween sex
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 98.85%
H^2 (total variability / sampling variability): 86.82
Test for Heterogeneity:
Q(df = 1) = 86.8157, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.3116 0.0228 57.6099 <.0001 1.2670 1.3563 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between ages
Fixed-Effects Model (k = 4)
I^2 (total heterogeneity / total variability): 63.85%
H^2 (total variability / sampling variability): 2.77
Test for Heterogeneity:
Q(df = 3) = 8.2993, p-val = 0.0402
Model Results:
estimate se zval pval ci.lb ci.ub
1.2487 0.0291 42.9598 <.0001 1.1917 1.3056 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between settings
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 98.03%
H^2 (total variability / sampling variability): 50.74
Test for Heterogeneity:
Q(df = 1) = 50.7425, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.3022 0.0193 67.5948 <.0001 1.2645 1.3400 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between primary substance
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 99.38%
H^2 (total variability / sampling variability): 161.43
Test for Heterogeneity:
Q(df = 1) = 161.4288, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.3842 0.0206 67.1615 <.0001 1.3438 1.4246 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between completed and non-completed treatments
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 98.38%
H^2 (total variability / sampling variability): 61.83
Test for Heterogeneity:
Q(df = 1) = 61.8300, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.2704 0.0191 66.4657 <.0001 1.2329 1.3078 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SMRs, indirect, mainBetween sex
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 63.81%
H^2 (total variability / sampling variability): 2.76
Test for Heterogeneity:
Q(df = 1) = 2.7632, p-val = 0.0965
Model Results:
estimate se zval pval ci.lb ci.ub
2.2314 0.0801 27.8706 <.0001 2.0745 2.3884 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between ages
Fixed-Effects Model (k = 4)
I^2 (total heterogeneity / total variability): 99.31%
H^2 (total variability / sampling variability): 144.23
Test for Heterogeneity:
Q(df = 3) = 432.6794, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
2.4121 0.0263 91.8591 <.0001 2.3606 2.4635 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between settings
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 23.71%
H^2 (total variability / sampling variability): 1.31
Test for Heterogeneity:
Q(df = 1) = 1.3108, p-val = 0.2522
Model Results:
estimate se zval pval ci.lb ci.ub
2.3603 0.0617 38.2425 <.0001 2.2393 2.4813 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between primary substance
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 0.00%
H^2 (total variability / sampling variability): 0.16
Test for Heterogeneity:
Q(df = 1) = 0.1567, p-val = 0.6922
Model Results:
estimate se zval pval ci.lb ci.ub
2.6820 0.0650 41.2571 <.0001 2.5546 2.8094 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between completed and non-completed treatments
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 88.61%
H^2 (total variability / sampling variability): 8.78
Test for Heterogeneity:
Q(df = 1) = 8.7810, p-val = 0.0030
Model Results:
estimate se zval pval ci.lb ci.ub
2.1996 0.0847 25.9775 <.0001 2.0336 2.3655 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
| type | comp | Qa_SMR | Qa_DSR |
|---|---|---|---|
| Main | Sex | Q 86.82 (df=1), p=0.000 | Q 2.76 (df=1), p=0.096 |
| Main | Age groups | Q 8.30 (df=3), p=0.040 | Q 432.68 (df=3), p=0.000 |
| Main | Setting | Q 50.74 (df=1), p=0.000 | Q 1.31 (df=1), p=0.252 |
| Main | Primary substance | Q 161.43 (df=1), p=0.000 | Q 0.16 (df=1), p=0.692 |
| Main | Tr. compliance status | Q 61.83 (df=1), p=0.000 | Q 8.78 (df=1), p=0.003 |
Code
rbind.data.frame(
tibble(
group1 = "SMR",
group2 = NA_character_,
smr1 = NA_real_,
smr2 = NA_real_,
difference = NA_real_,
se_diff = NA_real_,
z = NA_real_,
p_unadj = NA_real_,
p_holm = NA_real_,
significance= NA_character_
),
pairwise_smr_test(smrs= sep_ind_a$estimate[4:7], lowers= sep_ind_a$lower[4:7], uppers= sep_ind_a$upper[4:7], a=.1)|>
mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+")),
tibble(
group1 = "DSR",
group2 = NA_character_,
smr1 = NA_real_,
smr2 = NA_real_,
difference = NA_real_,
se_diff = NA_real_,
z = NA_real_,
p_unadj = NA_real_,
p_holm = NA_real_,
significance= NA_character_
),
pairwise_smr_test(smrs= sep_dir_a$estimate[4:7], lowers= sep_dir_a$lower[4:7], uppers= sep_dir_a$upper[4:7], a=.1)|>
mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+"))
)|>
dplyr::select(-significance) |>
rename("estimate1"="smr1", "estimate2"="smr2")|>
mutate(across(c("se_diff", "z"),~round(.,2))) |>
knitr::kable("markdown", caption="Pairwise comparison, age groups")| group1 | group2 | estimate1 | estimate2 | difference | se_diff | z | p_unadj | p_holm |
|---|---|---|---|---|---|---|---|---|
| SMR | ||||||||
| 18-29 | 30-44 | 3.438224 | 3.876232 | -0.438008 | 0.50 | 0.88 | 0.3786415 | 1.0000000 |
| 18-29 | 45-59 | 3.438224 | 3.588361 | -0.150137 | 0.43 | 0.35 | 0.7280816 | 1.0000000 |
| 18-29 | 60+ | 3.438224 | 3.010031 | 0.428193 | 0.45 | 0.95 | 0.3407015 | 1.0000000 |
| 30-44 | 45-59 | 3.876232 | 3.588361 | 0.287871 | 0.32 | 0.91 | 0.3625232 | 1.0000000 |
| 30-44 | 60+ | 3.876232 | 3.010031 | 0.866201 | 0.34 | 2.55 | 0.0107925 | 0.0647547 |
| 45-59 | 60+ | 3.588361 | 3.010031 | 0.578330 | 0.23 | 2.48 | 0.0131587 | 0.0657933 |
| DSR | ||||||||
| 18-29 | 30-44 | 2.883111 | 5.683244 | -2.800133 | 0.60 | 4.63 | 0.0000037 | 0.0000146 |
| 18-29 | 45-59 | 2.883111 | 14.226041 | -11.342930 | 0.47 | 24.02 | 0.0000000 | 0.0000000 |
| 18-29 | 60+ | 2.883111 | 46.704337 | -43.821226 | 20.84 | 2.10 | 0.0354917 | 0.1064750 |
| 30-44 | 45-59 | 5.683244 | 14.226041 | -8.542797 | 0.70 | 12.23 | 0.0000000 | 0.0000000 |
| 30-44 | 60+ | 5.683244 | 46.704337 | -41.021093 | 20.85 | 1.97 | 0.0490975 | 0.1064750 |
| 45-59 | 60+ | 14.226041 | 46.704337 | -32.478296 | 20.84 | 1.56 | 0.1191836 | 0.1191836 |
Code
cat("SMRs, indirect, sens")
cat("Between sex\n")
variances_ind_b_sex <- ((log(sep_ind_b$upper[2:3]) - log(sep_ind_b$estimate[2:3])) / qnorm(0.975))^2
meta_fe_ind_b_fe_sex <- rma(yi = log(sep_ind_b$estimate[2:3]), sei = sqrt(variances_ind_b_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_ind_b_fe_sex
cat("Between ages\n")
variances_ind_b_agegroup <- ((log(sep_ind_b$upper[4:7]) - log(sep_ind_b$estimate[4:7])) / qnorm(0.975))^2
meta_fe_ind_b_fe_agegroup <- rma(yi = log(sep_ind_b$estimate[4:7]), sei = sqrt(variances_ind_b_agegroup), method = "FE")
meta_fe_ind_b_fe_agegroup
cat("Between settings\n")
variances_ind_b_setting <- ((log(sep_ind_b$upper[8:9]) - log(sep_ind_b$estimate[8:9])) / qnorm(0.975))^2
meta_fe_ind_b_fe_setting <- rma(yi = log(sep_ind_b$estimate[8:9]), sei = sqrt(variances_ind_b_setting), method = "FE")
meta_fe_ind_b_fe_setting
cat("Between primary substance\n")
variances_ind_b_licit <- ((log(sep_ind_b$upper[10:11]) - log(sep_ind_b$estimate[10:11])) / qnorm(0.975))^2
meta_fe_ind_b_fe_licit <- rma(yi = log(sep_ind_b$estimate[10:11]), sei = sqrt(variances_ind_b_licit), method = "FE")
meta_fe_ind_b_fe_licit
cat("Between completed and non-completed treatments\n")
variances_ind_b_comp <- ((log(sep_ind_b$upper[12:13]) - log(sep_ind_b$estimate[12:13])) / qnorm(0.975))^2
meta_fe_ind_b_fe_comp <- rma(yi = log(sep_ind_b$estimate[12:13]), sei = sqrt(variances_ind_b_comp), method = "FE")
meta_fe_ind_b_fe_comp
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("SMRs, indirect, sens")
cat("Between sex\n")
variances_dir_b_sex <- ((log(sep_dir_b$upper[2:3]) - log(sep_dir_b$estimate[2:3])) / qnorm(0.975))^2
meta_fe_dir_b_fe_sex <- rma(yi = log(sep_dir_b$estimate[2:3]), sei = sqrt(variances_dir_b_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_dir_b_fe_sex
cat("Between ages\n")
variances_dir_b_agegroup <- ((log(sep_dir_b$upper[4:7]) - log(sep_dir_b$estimate[4:7])) / qnorm(0.975))^2
meta_fe_dir_b_fe_agegroup <- rma(yi = log(sep_dir_b$estimate[4:7]), sei = sqrt(variances_dir_b_agegroup), method = "FE")
meta_fe_dir_b_fe_agegroup
cat("Between settings\n")
variances_dir_b_setting <- ((log(sep_dir_b$upper[8:9]) - log(sep_dir_b$estimate[8:9])) / qnorm(0.975))^2
meta_fe_dir_b_fe_setting <- rma(yi = log(sep_dir_b$estimate[8:9]), sei = sqrt(variances_dir_b_setting), method = "FE")
meta_fe_dir_b_fe_setting
cat("Between primary substance\n")
variances_dir_b_licit <- ((log(sep_dir_b$upper[10:11]) - log(sep_dir_b$estimate[10:11])) / qnorm(0.975))^2
meta_fe_dir_b_fe_licit <- rma(yi = log(sep_dir_b$estimate[10:11]), sei = sqrt(variances_dir_b_licit), method = "FE")
meta_fe_dir_b_fe_licit
cat("Between completed and non-completed treatments\n")
variances_dir_b_comp <- ((log(sep_dir_b$upper[12:13]) - log(sep_dir_b$estimate[12:13])) / qnorm(0.975))^2
meta_fe_dir_b_fe_comp <- rma(yi = log(sep_dir_b$estimate[12:13]), sei = sqrt(variances_dir_b_comp), method = "FE")
meta_fe_dir_b_fe_comp
bind_rows(
cbind.data.frame(type= "Sensitivity", comp= "Sex", Q= meta_fe_ind_b_fe_sex$QE, p= meta_fe_ind_b_fe_sex$QEp, Q_b= meta_fe_dir_b_fe_sex$QE, p_b= meta_fe_dir_b_fe_sex$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Age groups", Q= meta_fe_ind_b_fe_agegroup$QE, p= meta_fe_ind_b_fe_agegroup$QEp, Q_b= meta_fe_dir_b_fe_agegroup$QE, p_b= meta_fe_dir_b_fe_agegroup$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Setting", Q= meta_fe_ind_b_fe_setting$QE, p= meta_fe_ind_b_fe_setting$QEp, Q_b= meta_fe_dir_b_fe_setting$QE, p_b= meta_fe_dir_b_fe_setting$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Primary substance", Q= meta_fe_ind_b_fe_licit$QE, p= meta_fe_ind_b_fe_licit$QEp, Q_b= meta_fe_dir_b_fe_licit$QE, p_b= meta_fe_dir_b_fe_licit$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Tr. compliance status", Q= meta_fe_ind_b_fe_comp$QE, p= meta_fe_ind_b_fe_comp$QEp, Q_b= meta_fe_dir_b_fe_comp$QE, p_b= meta_fe_dir_b_fe_comp$QEp)
) |>
mutate(
Qa_SMR = case_when(
str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q, p),
TRUE ~ sprintf("Q %.2f (df=1), p=%.3f", Q, p)
),
Qa_DSR = case_when(
str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q_b, p_b),
TRUE ~ sprintf("Q %.2f (df=1), p=%.3f", Q_b, p_b)
)
) |> dplyr::select(type, comp, Qa_SMR, Qa_DSR)|>
knitr::kable("markdown", caption= "Heterogeneity, sensitivity")SMRs, indirect, sensBetween sex
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 98.47%
H^2 (total variability / sampling variability): 65.16
Test for Heterogeneity:
Q(df = 1) = 65.1585, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.4313 0.0278 51.4241 <.0001 1.3767 1.4858 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between ages
Fixed-Effects Model (k = 4)
I^2 (total heterogeneity / total variability): 78.17%
H^2 (total variability / sampling variability): 4.58
Test for Heterogeneity:
Q(df = 3) = 13.7443, p-val = 0.0033
Model Results:
estimate se zval pval ci.lb ci.ub
1.3271 0.0318 41.7670 <.0001 1.2648 1.3894 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between settings
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 98.82%
H^2 (total variability / sampling variability): 85.06
Test for Heterogeneity:
Q(df = 1) = 85.0625, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.4163 0.0188 75.3034 <.0001 1.3795 1.4532 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between primary substance
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 99.39%
H^2 (total variability / sampling variability): 164.66
Test for Heterogeneity:
Q(df = 1) = 164.6616, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.4606 0.0214 68.1468 <.0001 1.4186 1.5026 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between completed and non-completed treatments
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 98.92%
H^2 (total variability / sampling variability): 92.71
Test for Heterogeneity:
Q(df = 1) = 92.7138, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.3392 0.0185 72.3847 <.0001 1.3029 1.3754 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SMRs, indirect, sensBetween sex
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 63.59%
H^2 (total variability / sampling variability): 2.75
Test for Heterogeneity:
Q(df = 1) = 2.7462, p-val = 0.0975
Model Results:
estimate se zval pval ci.lb ci.ub
2.3506 0.0768 30.6158 <.0001 2.2001 2.5010 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between ages
Fixed-Effects Model (k = 4)
I^2 (total heterogeneity / total variability): 98.98%
H^2 (total variability / sampling variability): 97.99
Test for Heterogeneity:
Q(df = 3) = 293.9785, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
2.5801 0.0298 86.5100 <.0001 2.5216 2.6385 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between settings
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 0.00%
H^2 (total variability / sampling variability): 0.58
Test for Heterogeneity:
Q(df = 1) = 0.5794, p-val = 0.4465
Model Results:
estimate se zval pval ci.lb ci.ub
2.5550 0.0639 39.9919 <.0001 2.4298 2.6803 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between primary substance
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 0.00%
H^2 (total variability / sampling variability): 0.05
Test for Heterogeneity:
Q(df = 1) = 0.0479, p-val = 0.8267
Model Results:
estimate se zval pval ci.lb ci.ub
2.8375 0.0782 36.2628 <.0001 2.6842 2.9909 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between completed and non-completed treatments
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 87.48%
H^2 (total variability / sampling variability): 7.99
Test for Heterogeneity:
Q(df = 1) = 7.9866, p-val = 0.0047
Model Results:
estimate se zval pval ci.lb ci.ub
2.3194 0.0854 27.1439 <.0001 2.1519 2.4868 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
| type | comp | Qa_SMR | Qa_DSR |
|---|---|---|---|
| Sensitivity | Sex | Q 65.16 (df=1), p=0.000 | Q 2.75 (df=1), p=0.097 |
| Sensitivity | Age groups | Q 13.74 (df=3), p=0.003 | Q 293.98 (df=3), p=0.000 |
| Sensitivity | Setting | Q 85.06 (df=1), p=0.000 | Q 0.58 (df=1), p=0.447 |
| Sensitivity | Primary substance | Q 164.66 (df=1), p=0.000 | Q 0.05 (df=1), p=0.827 |
| Sensitivity | Tr. compliance status | Q 92.71 (df=1), p=0.000 | Q 7.99 (df=1), p=0.005 |
Code
rbind.data.frame(
tibble(
group1 = "SMR",
group2 = NA_character_,
smr1 = NA_real_,
smr2 = NA_real_,
difference = NA_real_,
se_diff = NA_real_,
z = NA_real_,
p_unadj = NA_real_,
p_holm = NA_real_,
significance= NA_character_
),
pairwise_smr_test(smrs= sep_ind_b$estimate[4:7], lowers= sep_ind_b$lower[4:7], uppers= sep_ind_b$upper[4:7], a=.1)|>
mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+")),
tibble(
group1 = "DSR",
group2 = NA_character_,
smr1 = NA_real_,
smr2 = NA_real_,
difference = NA_real_,
se_diff = NA_real_,
z = NA_real_,
p_unadj = NA_real_,
p_holm = NA_real_,
significance= NA_character_
),
pairwise_smr_test(smrs= sep_dir_b$estimate[4:7], lowers= sep_dir_b$lower[4:7], uppers= sep_dir_b$upper[4:7], a=.1)|>
mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+"))
)|>
dplyr::select(-significance) |>
rename("estimate1"="smr1", "estimate2"="smr2")|>
mutate(across(c("se_diff", "z"),~round(.,2))) |>
knitr::kable("markdown", caption="Pairwise comparison, age groups, sensitivity")| group1 | group2 | estimate1 | estimate2 | difference | se_diff | z | p_unadj | p_holm |
|---|---|---|---|---|---|---|---|---|
| SMR | ||||||||
| 18-29 | 30-44 | 3.897919 | 4.421679 | -0.523760 | 0.61 | 0.86 | 0.3884133 | 0.7768265 |
| 18-29 | 45-59 | 3.897919 | 3.904502 | -0.006583 | 0.53 | 0.01 | 0.9901223 | 0.9901223 |
| 18-29 | 60+ | 3.897919 | 3.128950 | 0.768969 | 0.54 | 1.42 | 0.1545615 | 0.6182458 |
| 30-44 | 45-59 | 4.421679 | 3.904502 | 0.517177 | 0.38 | 1.35 | 0.1761901 | 0.6182458 |
| 30-44 | 60+ | 4.421679 | 3.128950 | 1.292729 | 0.39 | 3.28 | 0.0010339 | 0.0062031 |
| 45-59 | 60+ | 3.904502 | 3.128950 | 0.775552 | 0.26 | 2.95 | 0.0031980 | 0.0159899 |
| DSR | ||||||||
| 18-29 | 30-44 | 3.634236 | 7.255798 | -3.621562 | 0.96 | 3.76 | 0.0001707 | 0.0006827 |
| 18-29 | 45-59 | 3.634236 | 16.356597 | -12.722361 | 0.63 | 20.30 | 0.0000000 | 0.0000000 |
| 18-29 | 60+ | 3.634236 | 53.325194 | -49.690958 | 23.88 | 2.08 | 0.0374068 | 0.1122205 |
| 30-44 | 45-59 | 7.255798 | 16.356597 | -9.100799 | 1.06 | 8.62 | 0.0000000 | 0.0000000 |
| 30-44 | 60+ | 7.255798 | 53.325194 | -46.069396 | 23.89 | 1.93 | 0.0538073 | 0.1122205 |
| 45-59 | 60+ | 16.356597 | 53.325194 | -36.968597 | 23.88 | 1.55 | 0.1215820 | 0.1215820 |
Code
cat("SMRs, indirect, sensitivy (2)")
cat("Between sex\n")
variances_ind_c_sex <- ((log(sep_ind_c$upper[2:3]) - log(sep_ind_c$estimate[2:3])) / qnorm(0.975))^2
meta_fe_ind_c_fe_sex <- rma(yi = log(sep_ind_c$estimate[2:3]), sei = sqrt(variances_ind_c_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_ind_c_fe_sex
cat("Between ages\n")
variances_ind_c_agegroup <- ((log(sep_ind_c$upper[4:7]) - log(sep_ind_c$estimate[4:7])) / qnorm(0.975))^2
meta_fe_ind_c_fe_agegroup <- rma(yi = log(sep_ind_c$estimate[4:7]), sei = sqrt(variances_ind_c_agegroup), method = "FE")
meta_fe_ind_c_fe_agegroup
cat("Between settings\n")
variances_ind_c_setting <- ((log(sep_ind_c$upper[8:9]) - log(sep_ind_c$estimate[8:9])) / qnorm(0.975))^2
meta_fe_ind_c_fe_setting <- rma(yi = log(sep_ind_c$estimate[8:9]), sei = sqrt(variances_ind_c_setting), method = "FE")
meta_fe_ind_c_fe_setting
cat("Between primary substance\n")
variances_ind_c_licit <- ((log(sep_ind_c$upper[10:11]) - log(sep_ind_c$estimate[10:11])) / qnorm(0.975))^2
meta_fe_ind_c_fe_licit <- rma(yi = log(sep_ind_c$estimate[10:11]), sei = sqrt(variances_ind_c_licit), method = "FE")
meta_fe_ind_c_fe_licit
cat("Between completed and non-completed treatments\n")
variances_ind_c_comp <- ((log(sep_ind_c$upper[12:13]) - log(sep_ind_c$estimate[12:13])) / qnorm(0.975))^2
meta_fe_ind_c_fe_comp <- rma(yi = log(sep_ind_c$estimate[12:13]), sei = sqrt(variances_ind_c_comp), method = "FE")
meta_fe_ind_c_fe_comp
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("SMRs, indirect, sens")
cat("Between sex\n")
variances_dir_c_sex <- ((log(sep_dir_c$upper[2:3]) - log(sep_dir_c$estimate[2:3])) / qnorm(0.975))^2
meta_fe_dir_c_fe_sex <- rma(yi = log(sep_dir_c$estimate[2:3]), sei = sqrt(variances_dir_c_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_dir_c_fe_sex
cat("Between ages\n")
variances_dir_c_agegroup <- ((log(sep_dir_c$upper[4:7]) - log(sep_dir_c$estimate[4:7])) / qnorm(0.975))^2
meta_fe_dir_c_fe_agegroup <- rma(yi = log(sep_dir_c$estimate[4:7]), sei = sqrt(variances_dir_c_agegroup), method = "FE")
meta_fe_dir_c_fe_agegroup
cat("Between settings\n")
variances_dir_c_setting <- ((log(sep_dir_c$upper[8:9]) - log(sep_dir_c$estimate[8:9])) / qnorm(0.975))^2
meta_fe_dir_c_fe_setting <- rma(yi = log(sep_dir_c$estimate[8:9]), sei = sqrt(variances_dir_c_setting), method = "FE")
meta_fe_dir_c_fe_setting
cat("Between primary substance\n")
variances_dir_c_licit <- ((log(sep_dir_c$upper[10:11]) - log(sep_dir_c$estimate[10:11])) / qnorm(0.975))^2
meta_fe_dir_c_fe_licit <- rma(yi = log(sep_dir_c$estimate[10:11]), sei = sqrt(variances_dir_c_licit), method = "FE")
meta_fe_dir_c_fe_licit
cat("Between completed and non-completed treatments\n")
variances_dir_c_comp <- ((log(sep_dir_c$upper[12:13]) - log(sep_dir_c$estimate[12:13])) / qnorm(0.975))^2
meta_fe_dir_c_fe_comp <- rma(yi = log(sep_dir_c$estimate[12:13]), sei = sqrt(variances_dir_c_comp), method = "FE")
meta_fe_dir_c_fe_comp
bind_rows(
cbind.data.frame(type= "Sensitivity", comp= "Sex", Q= meta_fe_ind_c_fe_sex$QE, p= meta_fe_ind_c_fe_sex$QEp, Q_b= meta_fe_dir_c_fe_sex$QE, p_b= meta_fe_dir_c_fe_sex$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Age groups", Q= meta_fe_ind_c_fe_agegroup$QE, p= meta_fe_ind_c_fe_agegroup$QEp, Q_b= meta_fe_dir_c_fe_agegroup$QE, p_b= meta_fe_dir_c_fe_agegroup$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Setting", Q= meta_fe_ind_c_fe_setting$QE, p= meta_fe_ind_c_fe_setting$QEp, Q_b= meta_fe_dir_c_fe_setting$QE, p_b= meta_fe_dir_c_fe_setting$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Primary substance", Q= meta_fe_ind_c_fe_licit$QE, p= meta_fe_ind_c_fe_licit$QEp, Q_b= meta_fe_dir_c_fe_licit$QE, p_b= meta_fe_dir_c_fe_licit$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Tr. compliance status", Q= meta_fe_ind_c_fe_comp$QE, p= meta_fe_ind_c_fe_comp$QEp, Q_b= meta_fe_dir_c_fe_comp$QE, p_b= meta_fe_dir_c_fe_comp$QEp)
) |>
mutate(
Qa_SMR = case_when(
str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q, p),
TRUE ~ sprintf("Q %.2f (df=1), p=%.3f", Q, p)
),
Qa_DSR = case_when(
str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q_b, p_b),
TRUE ~ sprintf("Q %.2f (df=1), p=%.3f", Q_b, p_b)
)
) |> dplyr::select(type, comp, Qa_SMR, Qa_DSR)|>
knitr::kable("markdown", caption= "Heterogeneity, sensitivity (2)")SMRs, indirect, sensitivy (2)Between sex
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 99.04%
H^2 (total variability / sampling variability): 104.38
Test for Heterogeneity:
Q(df = 1) = 104.3827, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.3917 0.0196 71.1303 <.0001 1.3533 1.4300 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between ages
Fixed-Effects Model (k = 4)
I^2 (total heterogeneity / total variability): 70.61%
H^2 (total variability / sampling variability): 3.40
Test for Heterogeneity:
Q(df = 3) = 10.2080, p-val = 0.0169
Model Results:
estimate se zval pval ci.lb ci.ub
1.3232 0.0276 47.9016 <.0001 1.2690 1.3773 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between settings
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 98.24%
H^2 (total variability / sampling variability): 56.78
Test for Heterogeneity:
Q(df = 1) = 56.7822, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.3843 0.0164 84.4820 <.0001 1.3522 1.4164 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between primary substance
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 99.61%
H^2 (total variability / sampling variability): 259.18
Test for Heterogeneity:
Q(df = 1) = 259.1790, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.4912 0.0175 85.3448 <.0001 1.4569 1.5254 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between completed and non-completed treatments
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 95.71%
H^2 (total variability / sampling variability): 23.32
Test for Heterogeneity:
Q(df = 1) = 23.3207, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
1.3591 0.0157 86.8143 <.0001 1.3284 1.3897 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SMRs, indirect, sensBetween sex
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 90.96%
H^2 (total variability / sampling variability): 11.06
Test for Heterogeneity:
Q(df = 1) = 11.0628, p-val = 0.0009
Model Results:
estimate se zval pval ci.lb ci.ub
2.3149 0.0550 42.0836 <.0001 2.2071 2.4227 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between ages
Fixed-Effects Model (k = 4)
I^2 (total heterogeneity / total variability): 99.47%
H^2 (total variability / sampling variability): 188.41
Test for Heterogeneity:
Q(df = 3) = 565.2305, p-val < .0001
Model Results:
estimate se zval pval ci.lb ci.ub
2.4198 0.0273 88.7556 <.0001 2.3664 2.4733 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between settings
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 0.00%
H^2 (total variability / sampling variability): 0.46
Test for Heterogeneity:
Q(df = 1) = 0.4556, p-val = 0.4997
Model Results:
estimate se zval pval ci.lb ci.ub
2.5200 0.0596 42.3143 <.0001 2.4032 2.6367 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between primary substance
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 0.00%
H^2 (total variability / sampling variability): 0.82
Test for Heterogeneity:
Q(df = 1) = 0.8246, p-val = 0.3638
Model Results:
estimate se zval pval ci.lb ci.ub
2.8037 0.0497 56.4004 <.0001 2.7063 2.9012 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Between completed and non-completed treatments
Fixed-Effects Model (k = 2)
I^2 (total heterogeneity / total variability): 86.16%
H^2 (total variability / sampling variability): 7.23
Test for Heterogeneity:
Q(df = 1) = 7.2264, p-val = 0.0072
Model Results:
estimate se zval pval ci.lb ci.ub
2.3429 0.0472 49.6303 <.0001 2.2504 2.4354 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
| type | comp | Qa_SMR | Qa_DSR |
|---|---|---|---|
| Sensitivity | Sex | Q 104.38 (df=1), p=0.000 | Q 11.06 (df=1), p=0.001 |
| Sensitivity | Age groups | Q 10.21 (df=3), p=0.017 | Q 565.23 (df=3), p=0.000 |
| Sensitivity | Setting | Q 56.78 (df=1), p=0.000 | Q 0.46 (df=1), p=0.500 |
| Sensitivity | Primary substance | Q 259.18 (df=1), p=0.000 | Q 0.82 (df=1), p=0.364 |
| Sensitivity | Tr. compliance status | Q 23.32 (df=1), p=0.000 | Q 7.23 (df=1), p=0.007 |
Code
rbind.data.frame(
tibble(
group1 = "SMR",
group2 = NA_character_,
smr1 = NA_real_,
smr2 = NA_real_,
difference = NA_real_,
se_diff = NA_real_,
z = NA_real_,
p_unadj = NA_real_,
p_holm = NA_real_,
significance= NA_character_
),
pairwise_smr_test(smrs= sep_ind_c$estimate[4:7], lowers= sep_ind_c$lower[4:7], uppers= sep_ind_c$upper[4:7], a=.1)|>
mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+")),
tibble(
group1 = "DSR",
group2 = NA_character_,
smr1 = NA_real_,
smr2 = NA_real_,
difference = NA_real_,
se_diff = NA_real_,
z = NA_real_,
p_unadj = NA_real_,
p_holm = NA_real_,
significance= NA_character_
),
pairwise_smr_test(smrs= sep_dir_c$estimate[4:7], lowers= sep_dir_c$lower[4:7], uppers= sep_dir_c$upper[4:7], a=.1)|>
mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+"))
)|>
dplyr::select(-significance) |>
rename("estimate1"="smr1", "estimate2"="smr2")|>
mutate(across(c("se_diff", "z"),~round(.,2))) |>
knitr::kable("markdown", caption="Pairwise comparison, age groups, sensitivity")| group1 | group2 | estimate1 | estimate2 | difference | se_diff | z | p_unadj | p_holm |
|---|---|---|---|---|---|---|---|---|
| SMR | ||||||||
| 18-29 | 30-44 | 3.671246 | 4.231312 | -0.560066 | 0.47 | 1.19 | 0.2359458 | 0.9437833 |
| 18-29 | 45-59 | 3.671246 | 3.942439 | -0.271193 | 0.42 | 0.65 | 0.5165100 | 1.0000000 |
| 18-29 | 60+ | 3.671246 | 3.361856 | 0.309390 | 0.41 | 0.75 | 0.4527175 | 1.0000000 |
| 30-44 | 45-59 | 4.231312 | 3.942439 | 0.288873 | 0.33 | 0.88 | 0.3789381 | 1.0000000 |
| 30-44 | 60+ | 4.231312 | 3.361856 | 0.869456 | 0.32 | 2.71 | 0.0066953 | 0.0401718 |
| 45-59 | 60+ | 3.942439 | 3.361856 | 0.580583 | 0.23 | 2.49 | 0.0126800 | 0.0634000 |
| DSR | ||||||||
| 18-29 | 30-44 | 2.942963 | 6.180656 | -3.237693 | 0.45 | 7.17 | 0.0000000 | 0.0000000 |
| 18-29 | 45-59 | 2.942963 | 16.423330 | -13.480367 | 0.60 | 22.64 | 0.0000000 | 0.0000000 |
| 18-29 | 60+ | 2.942963 | 37.547356 | -34.604393 | 6.86 | 5.04 | 0.0000005 | 0.0000014 |
| 30-44 | 45-59 | 6.180656 | 16.423330 | -10.242674 | 0.67 | 15.20 | 0.0000000 | 0.0000000 |
| 30-44 | 60+ | 6.180656 | 37.547356 | -31.366700 | 6.87 | 4.57 | 0.0000050 | 0.0000099 |
| 45-59 | 60+ | 16.423330 | 37.547356 | -21.124026 | 6.88 | 3.07 | 0.0021399 | 0.0021399 |
Kaplan-Meier
Code
clean_df_corr_surv_km<-
clean_df_corr_surv|>
mutate(agegroup = cut(
disch_age_rec, # la variable de edad
breaks = c(18, 30, 45, 60, 76), # límites (incluye 15, excluye 65)
right = FALSE, # intervalo izquierdo cerrado [15–30)
labels = c("18-29", "30-44", "45-59", "60+"),
include.lowest = TRUE # 15 entra en el primer tramo
),
#year= lubridate::year(as.Date(exit_date))
year = lubridate::year(as.Date(disch_date_num_rec6)) # USE DISCHARGE YEAR
)|>
filter(disch_age_rec>17, disch_age_rec<76)
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
km_fit <- survfit(Surv(pyrs, status==1) ~ 1, data = clean_df_corr_surv_km)
m_data_m <- data.frame(
time = km_fit$time,
surv = km_fit$surv,
upper = km_fit$upper,
lower = km_fit$lower
#strata = rep(c("6623, Primer mes, TSM y Comorbilidad(2)","6612, Primer mes TUS(1)"), km_fit$strata)
)
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
km_fit_age <- survfit(Surv(pyrs, status==1) ~ strata(agegroup), data = clean_df_corr_surv_km)
m_data_age <- data.frame(
time = km_fit_age$time,
surv = km_fit_age$surv,
upper = km_fit_age$upper,
lower = km_fit_age$lower,
strata = rep(c("18-29","30-44", "45-59", "60+"), km_fit_age$strata)
)
age_km<-
ggplot(m_data_age, aes(x = time, y = surv, color = strata)) +
geom_step(size = 1.2) + # Curvas de supervivencia
geom_ribbon(aes(ymin = lower, ymax = upper, fill = strata), alpha = 0.2, color = NA) + # Intervalos de confianza
labs(
x = "Time (years)",
y = "Survival probability",
color = "Strata",
fill = "Strata"
) +
theme_minimal() +
theme(legend.position = "bottom")+
scale_color_manual(values = gray.colors(4, start = 0, end = 0.8)) +
scale_fill_manual(values = gray.colors(4, start = 0, end = 0.8)) +
ylim(c(0.5,1))
# scale_color_manual(values = c("#E2725B", "#D2B48C")) + # Colores para las curvas
# scale_fill_manual(values = c("#E2725B", "#D2B48C")) # Colores para las áreas sombreadas
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
km_fit_sex <- survfit(Surv(pyrs, status==1) ~ strata(sex_rec), data = clean_df_corr_surv_km)
m_data_sex <- data.frame(
time = km_fit_sex$time,
surv = km_fit_sex$surv,
upper = km_fit_sex$upper,
lower = km_fit_sex$lower,
strata = rep(c("Male", "Female"), km_fit_sex$strata)
)
sex_km<-
ggplot(m_data_sex, aes(x = time, y = surv, color = strata)) +
geom_step(size = 1.2) + # Curvas de supervivencia
geom_ribbon(aes(ymin = lower, ymax = upper, fill = strata), alpha = 0.2, color = NA) + # Intervalos de confianza
labs(
x = "Time (years)",
y = "Survival probability",
color = "Strata",
fill = "Strata"
) +
theme_minimal() +
theme(legend.position = "bottom")+
scale_color_manual(values = gray.colors(2, start = 0, end = 0.8)) +
scale_fill_manual(values = gray.colors(2, start = 0, end = 0.8)) +
ylim(c(0.5,1))
plot_grid(age_km,
sex_km+ labs(x=NULL)+ theme(axis.text.y = element_blank()), ncol = 2)
ggsave(paste0(getwd(),"/_figs/kaplanmeier_age_sex.png"), dpi = 600, width = 9)Net survival
Code
#This article shows that if the follow-up of the cohort is less than 10 years,
#any of these methods should give similar results. However, the Hakulinen method is preferred,
#since it accounts for heterogeneity due to potential withdrawals.
#https://scielo.isciii.es/scielo.php?script=sci_arttext&pid=S0213-91112006000400012
cat("Discharge age and calendar year\n")
clean_df_corr_surv_km$disch_age <- with(clean_df_corr_surv_km, time_length(interval(birth_date_rec, disch_date_rec6), unit="year"))
clean_df_corr_surv_km$disch_yr<- as.integer(clean_df_corr_surv_km$yr_fr_disch_date)
clean_df_corr_surv_km[, c("follow_up_days","status", "disch_age", "disch_yr", "sex_rec", "agegroup", "res_plan", "tr_compliance_status", "prim_sub_licit")]|> rio::export(paste0(gsub("/cons","",getwd()), "/clean_df_corr_surv_km.csv"))
modSR5_SC <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ 1, data=clean_df_corr_surv_km,
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"),
paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age = disch_age *365.241 , sex = sex_rec , year = disch_yr))
surv_data5_sc <- data.frame(
time = modSR5_SC$time,
surv = modSR5_SC$surv,
lower = modSR5_SC$lower,
upper = modSR5_SC$upper
# Hay que repetir el nombre de estrato la cantidad de filas que le corresponden
#strata = rep(names(modSR5_SC$strata), modSR5_SC$strata)
)
sn_5_total<- summary(modSR5_SC, times = c(5)*365.241)
sn_10_total<- summary(modSR5_SC, times = c(10)*365.241)
ggplot() +
geom_step(data= m_data_m, aes(x = time, y = surv), linewidth = 1, linetype="dashed") + # Curva de supervivencia
geom_step(data= surv_data5_sc, aes(x = time/365.25, y = surv), linewidth = 1) + # Curva de supervivencia
geom_ribbon(data= surv_data5_sc, aes(x = time/365.25, y = surv, ymin = lower, ymax = upper), fill= "black", alpha = 0.2) + # Intervalos de confianza
labs(
x = "Time since discharge (years)",
y = "Relative survival",
) +
coord_cartesian(xlim = c(0, 10), ylim= c(0.6, 1.10)) +
geom_hline(yintercept = 1, linewidth = 2, linetype = "dashed", color = "gray") +
#theme_minimal() +
theme_sjPlot_manual()+
theme(
plot.title = element_text(size = 20), # Tamaño del título del gráfico
axis.title.x = element_text(size = 16), # Tamaño del título del eje x
axis.title.y = element_text(size = 16), # Tamaño del título del eje y
axis.text.x = element_text(size = 14), # Tamaño de las etiquetas del eje x
axis.text.y = element_text(size = 14) # Tamaño de las etiquetas del eje y
)Discharge age and calendar year
Code
modSR5_SC_sex <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ sex_rec, data=clean_df_corr_surv_km,
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"),
paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age = disch_age *365.241 , sex = sex_rec , year = disch_yr))
sn_5_sex<- summary(modSR5_SC_sex, times = c(5)*365.241)
sn_10_sex<- summary(modSR5_SC_sex, times = c(10)*365.241)
modSR5_SC_agegr <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ agegroup, data=clean_df_corr_surv_km,
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"),
paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age = disch_age *365.241 , sex = sex_rec , year = disch_yr))
sn_5_age<- summary(modSR5_SC_agegr, times = c(5)*365.241)
sn_10_age<- summary(modSR5_SC_agegr, times = c(10)*365.241)
modSR5_SC_res <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ res_plan, data=clean_df_corr_surv_km,
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"),
paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age = disch_age *365.241 , sex = sex_rec , year = disch_yr))
sn_5_res<- summary(modSR5_SC_res, times = c(5)*365.241)
sn_10_res<- summary(modSR5_SC_res, times = c(10)*365.241)
modSR5_SC_trcomp <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ tr_compliance_status, data=clean_df_corr_surv_km,
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"),
paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age = disch_age *365.241 , sex = sex_rec , year = disch_yr))
sn_5_trcomp<- summary(modSR5_SC_trcomp, times = c(5)*365.241)
sn_10_trcomp<- summary(modSR5_SC_trcomp, times = c(10)*365.241)
modSR5_SC_prim_licit <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ prim_sub_licit, data=clean_df_corr_surv_km,
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"),
paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age = disch_age *365.241 , sex = sex_rec , year = disch_yr))
sn_5_licit<- summary(modSR5_SC_prim_licit, times = c(5)*365.241)
sn_10_licit<- summary(modSR5_SC_prim_licit, times = c(10)*365.241)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
rbind.data.frame(
tibble(level = "Total", survival = sprintf("%.2f (%.2f–%.2f)",sn_5_total$surv, sn_5_total$lower, sn_5_total$upper)),
tibble(level = sub(".*=", "", rownames(sn_5_sex$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_sex$surv, sn_5_sex$lower, sn_5_sex$upper)),
tibble(level = sub(".*=", "", rownames(sn_5_age$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_age$surv, sn_5_age$lower, sn_5_age$upper)),
tibble(level = c("Ambulatory", "Residential"), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_res$surv, sn_5_res$lower, sn_5_res$upper)),
tibble(level = sub(".*=", "", rownames(sn_5_licit$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_licit$surv, sn_5_licit$lower, sn_5_licit$upper)),
tibble(level = sub(".*=", "", rownames(sn_5_trcomp$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_trcomp$surv, sn_5_trcomp$lower, sn_5_trcomp$upper))
)|>
(\(df) {
rownames(df) <- NULL
df->> netsurv_5_main
df
})() |>
knitr::kable("markdown", caption= "Net survival at 5 years of follow-up")
rbind.data.frame(
tibble(level = "Total", survival = sprintf("%.2f (%.2f–%.2f)",sn_10_total$surv, sn_10_total$lower, sn_10_total$upper)),
tibble(level = sub(".*=", "", rownames(sn_10_sex$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_sex$surv, sn_10_sex$lower, sn_10_sex$upper)),
tibble(level = sub(".*=", "", rownames(sn_10_age$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_age$surv, sn_10_age$lower, sn_10_age$upper)),
tibble(level = c("Ambulatory", "Residential"), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_res$surv, sn_10_res$lower, sn_10_res$upper)),
tibble(level = sub(".*=", "", rownames(sn_10_licit$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_licit$surv, sn_10_licit$lower, sn_10_licit$upper)),
tibble(level = sub(".*=", "", rownames(sn_10_trcomp$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_trcomp$surv, sn_10_trcomp$lower, sn_10_trcomp$upper))
)|>
(\(df) {
rownames(df) <- NULL
df->> netsurv_10_main
df
})() |>
knitr::kable("markdown", caption= "Net survival at 10 years of follow-up")| level | survival |
|---|---|
| Total | 0.97 (0.97–0.98) |
| Male | 0.97 (0.97–0.98) |
| Female | 0.97 (0.97–0.98) |
| 18-29 | 0.99 (0.99–0.99) |
| 30-44 | 0.98 (0.98–0.98) |
| 45-59 | 0.94 (0.93–0.94) |
| 60+ | 0.93 (0.91–0.96) |
| Ambulatory | 0.98 (0.97–0.98) |
| Residential | 0.97 (0.96–0.97) |
| illicit | 0.99 (0.99–0.99) |
| licit | 0.95 (0.94–0.95) |
| Completed | 0.98 (0.98–0.98) |
| Not completed | 0.97 (0.97–0.97) |
| level | survival |
|---|---|
| Total | 0.95 (0.95–0.96) |
| Male | 0.95 (0.95–0.96) |
| Female | 0.95 (0.94–0.96) |
| 18-29 | 0.98 (0.98–0.98) |
| 30-44 | 0.95 (0.95–0.96) |
| 45-59 | 0.89 (0.87–0.91) |
| 60+ | 0.83 (0.69–1.00) |
| Ambulatory | 0.96 (0.95–0.96) |
| Residential | 0.93 (0.92–0.94) |
| illicit | 0.97 (0.97–0.98) |
| licit | 0.90 (0.88–0.91) |
| Completed | 0.96 (0.95–0.97) |
| Not completed | 0.95 (0.94–0.95) |
Global survival
Approximate z-test for any pair of strata and control the family-wise error rate with Holm correction.
Code
df_netsurv_5_main<-
rbind.data.frame(
cbind.data.frame(level = "Total", survival = sn_5_total$surv, lower= sn_5_total$lower, upper= sn_5_total$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_5_sex$table)), survival = sn_5_sex$surv, lower= sn_5_sex$lower, upper= sn_5_sex$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_5_age$table)), survival = sn_5_age$surv, lower= sn_5_age$lower, upper= sn_5_age$upper),
cbind.data.frame(level = c("Ambulatory", "Residential"), survival = sn_5_res$surv, lower= sn_5_res$lower, upper=sn_5_res$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_5_licit$table)), survival = sn_5_licit$surv, lower= sn_5_licit$lower, upper= sn_5_licit$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_5_trcomp$table)), survival = sn_5_trcomp$surv, lower= sn_5_trcomp$lower, upper= sn_5_trcomp$upper)
)
df_netsurv_10_main<-
rbind.data.frame(
cbind.data.frame(level = "Total", survival = sn_10_total$surv, lower= sn_10_total$lower, upper= sn_10_total$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_10_sex$table)), survival = sn_10_sex$surv, lower= sn_10_sex$lower, upper= sn_10_sex$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_10_age$table)), survival = sn_10_age$surv, lower= sn_10_age$lower, upper= sn_10_age$upper),
cbind.data.frame(level = c("Ambulatory", "Residential"), survival = sn_10_res$surv, lower= sn_10_res$lower, upper=sn_10_res$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_10_licit$table)), survival = sn_10_licit$surv, lower= sn_10_licit$lower, upper= sn_10_licit$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_10_trcomp$table)), survival = sn_10_trcomp$surv, lower= sn_10_trcomp$lower, upper= sn_10_trcomp$upper)
)
# log(-log) transformation
df_netsurv_5_main$eta <- log(-log(df_netsurv_5_main$survival))
df_netsurv_5_main$eta_lower <- log(-log(df_netsurv_5_main$lower))
df_netsurv_5_main$eta_upper <- log(-log(df_netsurv_5_main$upper))
# standard error on the transformed scale
df_netsurv_5_main$se <- (df_netsurv_5_main$eta_upper - df_netsurv_5_main$eta_lower) / (2*1.96)
# helper to compare two rows
cmp <- function(g1, g2) {
i <- match(g1, df_netsurv_5_main$level); j <- match(g2, df_netsurv_5_main$level)
z <- (df_netsurv_5_main$eta[i] - df_netsurv_5_main$eta[j]) / sqrt(df_netsurv_5_main$se[i]^2 + df_netsurv_5_main$se[j]^2)
p <- 2*pnorm(-abs(z))
c(z = z, p = p)
}
pairs <- rbind(
c("licit", "illicit"),
c("Completed", "Not completed"),
c("Female", "Male"),
c("Ambulatory", "Residential")
)
out <- t(apply(pairs, 1, function(x) cmp(x[1], x[2])))
colnames(out) <- c("z", "raw_p")
out <- cbind.data.frame(pairs, out, holm_p = p.adjust(out[,"raw_p"], method = "holm"))
out |>
knitr::kable("markdown", caption= "Comparison by strata at 10 years of follow-up", col.names=c("Var1", "Var2", "z", "p value", "Holm-corrected"))
#La varianza de una probabilidad de supervivencia estimada (usando la fórmula de Greenwood, por ejemplo) depende de la propia probabilidad de supervivencia. Esto viola el supuesto de varianza constante (homocedasticidad) de muchas pruebas estadísticas.
#La transformación log-log complementaria (cloglog) está diseñada específicamente para estabilizar esta varianza. Hace que la varianza de la probabilidad transformada sea mucho menos dependiente de su valor. Esto hace que la prueba z sea más válida y fiable.
# log(-log) transformation
df_netsurv_10_main$eta <- log(-log(df_netsurv_10_main$survival))
df_netsurv_10_main$eta_lower <- log(-log(df_netsurv_10_main$lower))
df_netsurv_10_main$eta_upper <- log(-log(df_netsurv_10_main$upper))
# standard error on the transformed scale
df_netsurv_10_main$se <- (df_netsurv_10_main$eta_upper - df_netsurv_10_main$eta_lower) / (2*1.96)
# helper to compare two rows
cmp2 <- function(g1, g2) {
i <- match(g1, df_netsurv_10_main$level); j <- match(g2, df_netsurv_10_main$level)
z <- (df_netsurv_10_main$eta[i] - df_netsurv_10_main$eta[j]) / sqrt(df_netsurv_10_main$se[i]^2 + df_netsurv_10_main$se[j]^2)
p <- 2*pnorm(-abs(z))
c(z = z, p = p)
}
pairs <- rbind(
c("licit", "illicit"),
c("Completed", "Not completed"),
c("Female", "Male"),
c("Ambulatory", "Residential"),
c("18-29", "30-44"),
c("18-29", "45-59"),
c("18-29", "60+"),
c("30-44", "45-59"),
c("30-44", "60+"),
c("45-59", "60+")
)
out2 <- t(apply(pairs, 1, function(x) cmp2(x[1], x[2])))
colnames(out2) <- c("z", "raw_p")
out2 <- cbind.data.frame(pairs, out2, holm_p = p.adjust(out2[,"raw_p"], method = "holm"))
rownames(out2)<-NULL
if(!is_null(out2$holm_p.1)){
out2$holm_p.1 <- NULL
}
out2<-if(ncol(out2)>5){out2[,3:8]}else{out2}
out2|>
mutate(holm_p= case_when(holm_p<.001~"<.001",T~sprintf("%.3f",holm_p)))|>
knitr::kable("markdown", caption= "Comparison by strata at 10 years of follow-up", col.names=c("Var1", "Var2", "z", "p value", "Holm-corrected"))| Var1 | Var2 | z | p value | Holm-corrected |
|---|---|---|---|---|
| licit | illicit | 18.976749 | 0.0000000 | 0.0000000 |
| Completed | Not completed | -3.749014 | 0.0001775 | 0.0003551 |
| Female | Male | 0.660924 | 0.5086611 | 0.5086611 |
| Ambulatory | Residential | -3.893182 | 0.0000989 | 0.0002968 |
| Var1 | Var2 | z | p value | Holm-corrected |
|---|---|---|---|---|
| licit | illicit | 13.4513975 | 0.0000000 | <.001 |
| Completed | Not completed | -2.3341449 | 0.0195881 | 0.098 |
| Female | Male | 0.8305222 | 0.4062436 | 1.000 |
| Ambulatory | Residential | -4.9034899 | 0.0000009 | <.001 |
| 18-29 | 30-44 | -6.9018306 | 0.0000000 | <.001 |
| 18-29 | 45-59 | -12.0674056 | 0.0000000 | <.001 |
| 18-29 | 60+ | -1.5248118 | 0.1273060 | 0.509 |
| 30-44 | 45-59 | -7.2357760 | 0.0000000 | <.001 |
| 30-44 | 60+ | -0.9079985 | 0.3638790 | 1.000 |
| 45-59 | 60+ | -0.3325376 | 0.7394834 | 1.000 |
Flowchart
Code
library(DiagrammeR)
gr<-
grViz("
digraph flowchart {
graph [layout = dot, rankdir = TB]
# General node styling
node [fontname = Times, shape = rectangle, fontsize = 14, style = filled, fillcolor = transparent]
# Main flow nodes
original [label = 'Original Database\\n(n = 150,046;\\nPatients = 106,283)', fillcolor = lightgray]
c1_dataset [label = 'Database\\n(n = 146,012;\\nPatients = 103,612)']
after_discard [label = 'Database\\n(n = 88,774;\\nPatients = 88,774)']
after_discard2 [label = 'Database\\n(n = 74,470;\\nPatients = 74,470)']
final_dataset [label = 'Final Database\\n(n = 70,064;\\nPatients = 70,064)', fillcolor = lightgray]
# Discard nodes (aligned between main flow steps)
discard_referrals [label = '•Duplicates in admission age and hash key\\l and validating days in treatment (n= 54);\\l Records with unavailable missing days in\\l treatment (eg., currently in treatment): 4,007\\l•Records with negative days in treatment: 8;\\l>3 yrs. in treatment: 1,039)\\l']
discard_duplicates [label = '•Restricting treatments of patients admitted\\l between 2010-2019; having 18-64 years at\\l admission: 57,240\\l']
discard_single [label = '•Discarded (death, no tr. compliance): 14,304 \\l']
discard_single2 [label = '•Discarded missing values in sex, discharge and \\ldeath dates and negative follow-up periods: 4,406\\l']
# Invisible vertices for middle line
v1 [shape = point, width = 0, style = invis]
v2 [shape = point, width = 0, style = invis]
v3 [shape = point, width = 0, style = invis]
v4 [shape = point, width = 0, style = invis]
# Main flow edges (vertical line)
original -> v1 [arrowhead = none]
v1 -> c1_dataset
c1_dataset -> v2 [arrowhead = none]
v2 -> after_discard
after_discard ->v3 [arrowhead = none]
v3 -> after_discard2
after_discard2 -> v4 [arrowhead = none]
v4 -> final_dataset
# Discard connections (from the middle line)
v1 -> discard_referrals
v2 -> discard_duplicates
v3 -> discard_single
v4 -> discard_single2
# Alignment
{ rank = same; discard_referrals; v1 }
{ rank = same; discard_duplicates; v2 }
{ rank = same; discard_single; v3 }
{ rank = same; discard_single2; v4 }
}
",
width = 1000,
height = 1400)
grCode
unlink(paste0(gsub("/cons","",getwd()),"/cons/_figs/_mortality_flowchart_files"), recursive = TRUE)
htmlwidgets::saveWidget(gr, paste0(gsub("/cons","",getwd()),"/cons/_figs/_mortality_flowchart.html"))
webshot::webshot(paste0(gsub("/cons","",getwd()),"/cons/_figs/_mortality_flowchart.html"),
paste0(gsub("/cons","",getwd()),"/cons/_figs/_mortality_flowchart.png"),
vwidth = 300, vheight = 300*1.2, zoom=10, expand=100) # Prueba con diferentes coordenadas top, left, width, and heightMortality causes
Code
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#V01–Y98
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
table(!is.na(mortality$diag2))
#1229256 102597
prop.table(table(!is.na(mortality$diag2)))*100
#92.296672 7.703328
pat_ext <- "^[VWXY](0[1-9]|[1-8][0-9]|9[0-8])"
mortality <- mortality %>% # 2) clasifica
mutate(
ext_d1 = !is.na(diag1) & str_detect(diag1, pat_ext),
ext_d2 = !is.na(diag2) & str_detect(diag2, pat_ext),
ext_any = ext_d1 | ext_d2,
miss_d1 = is.na(diag1),
miss_d2 = is.na(diag2)
)
# 3) Resumen rápido
mortality %>% summarise(
n = n(),
n_ext_d1 = sum(ext_d1),
n_ext_d2 = sum(ext_d2),
n_ext_any = sum(ext_any),
n_miss_d1 = sum(miss_d1),
n_miss_d2 = sum(miss_d2),
prop_ext_any = mean(ext_any)
)
# n n_ext_d1 n_ext_d2 n_ext_any n_miss_d1 n_miss_d2 prop_ext_any
# <int> <int> <int> <int> <int> <int> <dbl>
# 1 1331853 0 96553 96553 0 1229256 0.0725
invisible("Every external cause is in DIAG2")
niveles_icd10 <- c(
"A00–B99 Infecciosas/parasitarias",
"C00–D48 Neoplasias",
"D50–D89 Sangre/inmunidad",
"E00–E90 Endocrinas/metabólicas",
"F00–F99 Mentales y del comportamiento",
"G00–G99 Sistema nervioso",
"H00–H59 Ojo y anexos",
"H60–H95 Oído y apófisis mastoides",
"I00–I99 Circulatorio",
"J00–J99 Respiratorio",
"K00–K93 Digestivo",
"L00–L99 Piel y tejido subcutáneo",
"M00–M99 Músculo-esquelético y tejido conjuntivo",
"N00–N99 Genitourinario",
"O00–O99 Embarazo, parto y puerperio",
"P00–P96 Perinatal",
"Q00–Q99 Malformaciones congénitas",
"R00–R99 Síntomas y signos mal definidos",
"S00–T88 Lesiones, envenenamientos y otras consecuencias externas",
"V01–Y98 Causas externas de morbilidad y mortalidad",
"U00–U85 Códigos de uso especial",
"Z00–Z99 Factores que influyen en el estado de salud"
)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
mortality <-
mortality |>
dplyr::mutate(mort= ifelse(hashkey %in% clean_df$hash_key,1,0)) |> #slice(-1) |>
dplyr::mutate(od = grepl("T400|T401|T403|T404|T406", diag2, ignore.case = F)) |> #Keen, C., Kinner, S. A., Young, J. T., Jang, K., Gan, W., Samji, H., Zhao, B., Krausz, M., & Slaunwhite, A. (2022). Prevalence of co-occurring mental illness and substance use disorder and association with overdose: A linked data cohort study among residents of British Columbia, Canada. Addiction, 117(1), 129-140. https://doi.org/10.1111/add.15580
#Vital statistics ICD-10 code of T40.0, T40.1, T40.3, T40.4, T40.6
#Defunciones de jóvenes (15-29 años) por causas externas de mortalidad en Chile,2017.
#https://www.conaset.cl/wp-content/uploads/2020/04/Causas_muerte_j%C3%B3venesDEIS2017.pdf
#https://apps.abacus.ai/chatllm/?appId=df4dbd254&convoId=8f2cf714
dplyr::mutate(
category = case_when(
# Suicides (X60–X84)
grepl("^X(6[0-9]|7[0-9]|8[0-4])", diag2) ~ "Intentional self-harm",
# Transport accidents (V01–V99)
grepl("^V[0-9]{2}", diag2) ~ "Transport accidents",
# Other unintentional external causes (W00–X59)
grepl("^(W[0-9]{2}|X[0-5][0-9])", diag2) ~ "Other unintentional external causes of injury",
# Assaults (X85–Y09)
grepl("^(X(8[5-9]|9[0-9])|Y0[0-9])", diag2) ~ "Assaults",
# Sequelae of external causes (Y85–Y89)
grepl("^Y8[5-9]", diag2) ~ "Other external causes", #Only 6 cases. Should be in other
# Complications of medical and surgical care (Y40–Y84)
grepl("^Y([4-7][0-9]|8[0-4])", diag2) ~ "Other external causes", #only 1 case. should be in other
nchar(diag2)<2| is.na(diag2) ~ "No external causes",
# All other codes
TRUE ~ "Other external causes"
)
)|>
mutate(chapter = case_when(
str_detect(diag1, "^R[0-9]{2}") ~ "Symptoms & signs",
str_detect(diag1, "^A|^B") ~ "Infectious & parasitic",
str_detect(diag1, "^I") ~ "Circulatory",
str_detect(diag1, "^J") ~ "Respiratory",
str_detect(diag1, "^K") ~ "Digestive",
str_detect(diag1, "^E") ~ "Endocrine & metabolic",
str_detect(diag1, "^C") ~ "Malignant neoplasms",
str_detect(diag1, "^F") ~ "Mental and behavioural",
str_detect(diag1, "^G") ~ "Nervous system",
str_detect(diag1, "^(D|H|L|M|N|O|P|Q)") ~ "Other causes",
TRUE ~ NA_character_ # if code malformed
), chapter= factor(chapter), category= factor(category)) |>
mutate(
chapter2 = case_when(
str_detect(diag1, "^[AB]") ~ niveles_icd10[1],
str_detect(diag1, "^C|^D0|^D1|^D2|^D3|^D4") ~ niveles_icd10[2],
str_detect(diag1, "^D[5-9]") ~ niveles_icd10[3],
str_detect(diag1, "^E") ~ niveles_icd10[4],
str_detect(diag1, "^F") ~ niveles_icd10[5],
str_detect(diag1, "^G") ~ niveles_icd10[6],
str_detect(diag1, "^H0|^H1|^H2|^H3|^H4|^H5") ~ niveles_icd10[7],
str_detect(diag1, "^H6|^H7|^H8|^H9") ~ niveles_icd10[8],
str_detect(diag1, "^I") ~ niveles_icd10[9],
str_detect(diag1, "^J") ~ niveles_icd10[10],
str_detect(diag1, "^K") ~ niveles_icd10[11],
str_detect(diag1, "^L") ~ niveles_icd10[12],
str_detect(diag1, "^M") ~ niveles_icd10[13],
str_detect(diag1, "^N") ~ niveles_icd10[14],
str_detect(diag1, "^O") ~ niveles_icd10[15],
str_detect(diag1, "^P") ~ niveles_icd10[16],
str_detect(diag1, "^Q") ~ niveles_icd10[17],
str_detect(diag1, "^R") ~ niveles_icd10[18],
str_detect(diag1, "^[ST]") ~ niveles_icd10[19],
str_detect(diag1, "^[VWX]") ~ niveles_icd10[20],
str_detect(diag1, "^U") ~ niveles_icd10[21],
str_detect(diag1, "^Z") ~ niveles_icd10[22],
TRUE ~ NA_character_ # códigos mal formados
),
chapter2 = factor(chapter2, levels = niveles_icd10)
)
mortality <-
mortality |>
mutate(edad_cant_cat = dplyr::case_when(
edad_cant >= 18 & edad_cant < 30 ~ "18-29",
edad_cant >= 30 & edad_cant < 45 ~ "30-44",
edad_cant >= 45 & edad_cant < 60 ~ "45-59",
edad_cant >= 60 & edad_cant < 86 ~ "60+",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
),
sex_rec = dplyr::case_when( # optional renaming step
sexo == 1 ~ "Male",
sexo == 2 ~ "Female",
TRUE~ NA_character_),
sex_rec= factor(sex_rec, levels = c("Male", "Female"))
) |>
filter(!is.na(sex_rec), !is.na(edad_cant_cat))
clean_df |>
left_join(mortality[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")) |> nrow()
#7065
invisible("+1 row added")
#70064
#
clean_df |>
left_join(mortality[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")) |> group_by(hash_key) |>
count() |>
filter(n>1)
cat("Exclude the duplicated mortality")
mortality_deduplicated <- mortality |>
arrange(hashkey, ano_def, mes_def, dia_def) |>
group_by(hashkey) |>
slice_head(n = 1) |>
ungroup()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_18<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, disch_age_cat=="18-29")$hash_key) |>
janitor::tabyl(chapter, show_na = T)
mort_chap_30<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, disch_age_cat=="30-44")$hash_key) |>
janitor::tabyl(chapter, show_na = T)
mort_chap_45<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, disch_age_cat=="45-59")$hash_key) |>
janitor::tabyl(chapter, show_na = T)
mort_chap_60<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, disch_age_cat=="60+")$hash_key) |>
janitor::tabyl(chapter, show_na = T)
mort_cat_18<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, disch_age_cat=="18-29")$hash_key) |>
janitor::tabyl(category)
mort_cat_30<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, disch_age_cat=="30-44")$hash_key) |>
janitor::tabyl(category, show_na = T)
mort_cat_45<-
mortality|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, disch_age_cat=="45-59")$hash_key) |>
janitor::tabyl(category)
mort_cat_60<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, disch_age_cat=="60+")$hash_key) |>
janitor::tabyl(category, show_na = T)
mort_chap_18 |>
full_join(mort_chap_30, by="chapter") |>
full_join(mort_chap_45, by="chapter") |>
full_join(mort_chap_60, by="chapter") |>
filter(!is.na(chapter)) |>
dplyr::select(chapter, dplyr::contains("valid")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
mort_cat_18 |>
full_join(mort_cat_30, by="category") |>
full_join(mort_cat_45, by="category") |>
full_join(mort_cat_60, by="category") |>
filter(!is.na(category)) |>
dplyr::select(category, dplyr::contains("percent")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_male<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, sex_rec=="Male")$hash_key) |>
janitor::tabyl(chapter, show_na = T)
mort_chap_female<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, sex_rec=="Female")$hash_key) |>
janitor::tabyl(chapter, show_na = T)
mort_cat_male<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, sex_rec=="Male")$hash_key) |>
janitor::tabyl(category)
mort_cat_female<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, sex_rec=="Female")$hash_key) |>
janitor::tabyl(category, show_na = T)
mort_chap_male |>
full_join(mort_chap_female, by="chapter") |>
filter(!is.na(chapter)) |>
dplyr::select(chapter, dplyr::contains("valid")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
mort_cat_male |>
full_join(mort_cat_female, by="category") |>
filter(!is.na(category)) |>
dplyr::select(category, dplyr::contains("percent")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_amb<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, res_plan==0)$hash_key) |>
janitor::tabyl(chapter)
mort_chap_res<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, res_plan==1)$hash_key) |>
janitor::tabyl(chapter)
mort_cat_amb<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, res_plan==0)$hash_key) |>
janitor::tabyl(category)
mort_cat_res<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, res_plan==1)$hash_key) |>
janitor::tabyl(category)
mort_chap_amb |>
full_join(mort_chap_res, by="chapter") |>
filter(!is.na(chapter)) |>
dplyr::select(chapter, dplyr::contains("valid")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
mort_cat_amb |>
full_join(mort_cat_res, by="category") |>
filter(!is.na(category)) |>
dplyr::select(category, dplyr::contains("percent")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_licit<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, prim_sub_licit=="licit")$hash_key) |>
janitor::tabyl(chapter)
mort_chap_illicit<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, prim_sub_licit=="illicit")$hash_key) |>
janitor::tabyl(chapter)
mort_cat_licit<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, prim_sub_licit=="licit")$hash_key) |>
janitor::tabyl(category)
mort_cat_illicit<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, prim_sub_licit=="illicit")$hash_key) |>
janitor::tabyl(category)
mort_chap_licit |>
full_join(mort_chap_illicit, by="chapter") |>
filter(!is.na(chapter)) |>
dplyr::select(chapter, dplyr::contains("valid")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
mort_cat_licit |>
full_join(mort_cat_illicit, by="category") |>
filter(!is.na(category)) |>
dplyr::select(category, dplyr::contains("percent")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_comp<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, !grepl("Not", tr_compliance_status))$hash_key) |>
janitor::tabyl(chapter)
mort_chap_not_comp<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, grepl("Not", tr_compliance_status))$hash_key) |>
janitor::tabyl(chapter)
mort_cat_comp<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, !grepl("Not", tr_compliance_status))$hash_key) |>
janitor::tabyl(category)
mort_cat_not_comp<-
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% subset(clean_df, grepl("Not", tr_compliance_status))$hash_key) |>
janitor::tabyl(category)
mort_chap_comp |>
full_join(mort_chap_not_comp, by="chapter") |>
filter(!is.na(chapter)) |>
dplyr::select(chapter, dplyr::contains("valid")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
mort_cat_comp |>
full_join(mort_cat_not_comp, by="category") |>
filter(!is.na(category)) |>
dplyr::select(category, dplyr::contains("percent")) |>
mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |>
rio::export("clipboard")Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE
Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
chapters_table <- mort_chap_18 %>%
dplyr::select(chapter, n, valid_percent) %>%
dplyr::rename(`18-29_n` = n, `18-29` = valid_percent) %>%
full_join(mort_chap_30 %>% dplyr::select(chapter, n, valid_percent) %>% rename(`30-44_n` = n, `30-44` = valid_percent), by = "chapter") %>%
full_join(mort_chap_45 %>% dplyr::select(chapter, n, valid_percent) %>% rename(`45-59_n` = n, `45-59` = valid_percent), by = "chapter") %>%
full_join(mort_chap_60 %>% dplyr::select(chapter, n, valid_percent) %>% rename(`60+_n` = n, `60+` = valid_percent), by = "chapter") %>%
full_join(mort_chap_male %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Male_n` = n, Male = valid_percent), by = "chapter") %>%
full_join(mort_chap_female %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Female_n` = n, Female = valid_percent), by = "chapter") %>%
full_join(mort_chap_amb %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Ambulatory_n` = n, Ambulatory = valid_percent), by = "chapter") %>%
full_join(mort_chap_res %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Residential_n` = n, Residential = valid_percent), by = "chapter") %>%
full_join(mort_chap_licit %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Licit_n` = n, Licit = valid_percent), by = "chapter") %>%
full_join(mort_chap_illicit %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Illicit_n` = n, Illicit = valid_percent), by = "chapter") %>%
full_join(mort_chap_comp %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Completed_n` = n, Completed = valid_percent), by = "chapter") %>%
full_join(mort_chap_not_comp %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Not_Completed_n` = n, `Not Completed` = valid_percent), by = "chapter") %>%
filter(!is.na(chapter)) %>%
mutate(
`18-29` = sprintf("%d (%.1f)", `18-29_n`, `18-29` * 100),
`30-44` = sprintf("%d (%.1f)", `30-44_n`, `30-44` * 100),
`45-59` = sprintf("%d (%.1f)", `45-59_n`, `45-59` * 100),
`60+` = sprintf("%d (%.1f)", `60+_n`, `60+` * 100),
Male = sprintf("%d (%.1f)", `Male_n`, Male * 100),
Female = sprintf("%d (%.1f)", `Female_n`, Female * 100),
Ambulatory = sprintf("%d (%.1f)", `Ambulatory_n`, Ambulatory * 100),
Residential = sprintf("%d (%.1f)", `Residential_n`, Residential * 100),
Licit = sprintf("%d (%.1f)", `Licit_n`, Licit * 100),
Illicit = sprintf("%d (%.1f)", `Illicit_n`, Illicit * 100),
Completed = sprintf("%d (%.1f)", `Completed_n`, Completed * 100),
`Not Completed` = sprintf("%d (%.1f)", `Not_Completed_n`, `Not Completed` * 100)
) %>%
dplyr::select(chapter, `18-29`, `30-44`, `45-59`, `60+`, Male, Female,
Ambulatory, Residential, Licit, Illicit, Completed, `Not Completed`)
# Create External Causes table (using percent)
categories_table <- mort_cat_18 %>%
dplyr::select(category, n, percent) %>%
dplyr::rename(`18-29_n` = n, `18-29` = percent) %>%
full_join(mort_cat_30 %>% dplyr::select(category, n, percent) %>% rename(`30-44_n` = n, `30-44` = percent), by = "category") %>%
full_join(mort_cat_45 %>% dplyr::select(category, n, percent) %>% rename(`45-59_n` = n, `45-59` = percent), by = "category") %>%
full_join(mort_cat_60 %>% dplyr::select(category, n, percent) %>% rename(`60+_n` = n, `60+` = percent), by = "category") %>%
full_join(mort_cat_male %>% dplyr::select(category, n, percent) %>% rename(`Male_n` = n, Male = percent), by = "category") %>%
full_join(mort_cat_female %>% dplyr::select(category, n, percent) %>% rename(`Female_n` = n, Female = percent), by = "category") %>%
full_join(mort_cat_amb %>% dplyr::select(category, n, percent) %>% rename(`Ambulatory_n` = n, Ambulatory = percent), by = "category") %>%
full_join(mort_cat_res %>% dplyr::select(category, n, percent) %>% rename(`Residential_n` = n, Residential = percent), by = "category") %>%
full_join(mort_cat_licit %>% dplyr::select(category, n, percent) %>% rename(`Licit_n` = n, Licit = percent), by = "category") %>%
full_join(mort_cat_illicit %>% dplyr::select(category, n, percent) %>% rename(`Illicit_n` = n, Illicit = percent), by = "category") %>%
full_join(mort_cat_comp %>% dplyr::select(category, n, percent) %>% rename(`Completed_n` = n, Completed = percent), by = "category") %>%
full_join(mort_cat_not_comp %>% dplyr::select(category, n, percent) %>% rename(`Not_Completed_n` = n, `Not Completed` = percent), by = "category") %>%
filter(!is.na(category)) %>%
mutate(
`18-29` = sprintf("%d (%.1f)", `18-29_n`, `18-29` * 100),
`30-44` = sprintf("%d (%.1f)", `30-44_n`, `30-44` * 100),
`45-59` = sprintf("%d (%.1f)", `45-59_n`, `45-59` * 100),
`60+` = sprintf("%d (%.1f)", `60+_n`, `60+` * 100),
Male = sprintf("%d (%.1f)", `Male_n`, Male * 100),
Female = sprintf("%d (%.1f)", `Female_n`, Female * 100),
Ambulatory = sprintf("%d (%.1f)", `Ambulatory_n`, Ambulatory * 100),
Residential = sprintf("%d (%.1f)", `Residential_n`, Residential * 100),
Licit = sprintf("%d (%.1f)", `Licit_n`, Licit * 100),
Illicit = sprintf("%d (%.1f)", `Illicit_n`, Illicit * 100),
Completed = sprintf("%d (%.1f)", `Completed_n`, Completed * 100),
`Not Completed` = sprintf("%d (%.1f)", `Not_Completed_n`, `Not Completed` * 100)
) %>%
dplyr::select(category, `18-29`, `30-44`, `45-59`, `60+`, Male, Female,
Ambulatory, Residential, Licit, Illicit, Completed, `Not Completed`)
# Create Table 1: ICD-10 Chapters
table1 <- kable(chapters_table,
format = "html",
col.names = c("ICD-10 Chapter", "18-29", "30-44", "45-59", "60+",
"Male", "Female", "Ambulatory", "Residential",
"Licit", "Illicit", "Completed", "Not Completed"),
caption = "Table SXX. Distribution (%) of mortality according to ICD-10 chapters stratified by treatment completion, primary substance, treatment setting, sex, and age",
escape = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, font_size = 12) %>%
add_header_above(c(" " = 1, "Age Groups" = 4, "Sex" = 2, "Treatment Setting" = 2,
"Primary Substance" = 2, "Treatment Completion" = 2)) %>%
column_spec(1, width = "12em", bold = TRUE) %>%
column_spec(2:13, width = "6em") %>%
row_spec(0, bold = TRUE, background = "#f0f0f0")
# Create Table 2: External Causes
table2 <- kable(categories_table,
format = "html",
col.names = c("External Cause Category", "18-29", "30-44", "45-59", "60+",
"Male", "Female", "Ambulatory", "Residential",
"Licit", "Illicit", "Completed", "Not Completed"),
caption = "Table SXX. Distribution (%) of external causes of mortality by treatment completion, primary substance, treatment setting, sex, and age",
escape = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, font_size = 12) %>%
add_header_above(c(" " = 1, "Age Groups" = 4, "Sex" = 2, "Treatment Setting" = 2,
"Primary Substance" = 2, "Treatment Completion" = 2)) %>%
column_spec(1, width = "12em", bold = TRUE) %>%
column_spec(2:13, width = "6em") %>%
row_spec(0, bold = TRUE, background = "#f0f0f0")
# Print tables
table1
table2
FALSE TRUE
1229256 102597
FALSE TRUE
92.296672 7.703328
# A tibble: 1 × 7
n n_ext_d1 n_ext_d2 n_ext_any n_miss_d1 n_miss_d2 prop_ext_any
<int> <int> <int> <int> <int> <int> <dbl>
1 1331853 0 96553 96553 0 1229256 0.0725
[1] 70065
# A tibble: 1 × 2
# Groups: hash_key [1]
hash_key n
<chr> <int>
1 2f9f74c4c0602d24c8de3e6bc473863c2bc9949ee8d31db6aad8323c6901083a 2
Exclude the duplicated mortality
| ICD-10 Chapter | 18-29 | 30-44 | 45-59 | 60+ | Male | Female | Ambulatory | Residential | Licit | Illicit | Completed | Not Completed |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Circulatory | 27 (17.9) | 126 (18.9) | 216 (22.5) | 38 (23.2) | 342 (22.4) | 65 (15.8) | 355 (21.9) | 52 (16.5) | 250 (18.9) | 157 (25.6) | 117 (24.6) | 290 (19.8) |
| Digestive | 26 (17.2) | 237 (35.6) | 380 (39.7) | 61 (37.2) | 562 (36.8) | 142 (34.5) | 599 (36.9) | 105 (33.2) | 603 (45.5) | 101 (16.5) | 137 (28.8) | 567 (38.7) |
| Endocrine & metabolic | 3 (2.0) | 21 (3.2) | 15 (1.6) | 3 (1.8) | 33 (2.2) | 9 (2.2) | 32 (2.0) | 10 (3.2) | 23 (1.7) | 19 (3.1) | 8 (1.7) | 34 (2.3) |
| Infectious & parasitic | 17 (11.3) | 71 (10.7) | 38 (4.0) | 3 (1.8) | 86 (5.6) | 43 (10.5) | 99 (6.1) | 30 (9.5) | 52 (3.9) | 77 (12.6) | 31 (6.5) | 98 (6.7) |
| Malignant neoplasms | 25 (16.6) | 50 (7.5) | 115 (12.0) | 30 (18.3) | 161 (10.5) | 59 (14.4) | 191 (11.8) | 29 (9.2) | 127 (9.6) | 93 (15.2) | 69 (14.5) | 151 (10.3) |
| Mental and behavioural | 3 (2.0) | 18 (2.7) | 25 (2.6) | 3 (1.8) | 45 (2.9) | 4 (1.0) | 40 (2.5) | 9 (2.8) | 38 (2.9) | 11 (1.8) | 9 (1.9) | 40 (2.7) |
| Nervous system | 7 (4.6) | 19 (2.9) | 19 (2.0) | 2 (1.2) | 39 (2.6) | 8 (1.9) | 38 (2.3) | 9 (2.8) | 25 (1.9) | 22 (3.6) | 9 (1.9) | 38 (2.6) |
| Other causes | 11 (7.3) | 16 (2.4) | 20 (2.1) | 6 (3.7) | 42 (2.7) | 11 (2.7) | 45 (2.8) | 8 (2.5) | 28 (2.1) | 25 (4.1) | 22 (4.6) | 31 (2.1) |
| Respiratory | 17 (11.3) | 63 (9.5) | 90 (9.4) | 11 (6.7) | 145 (9.5) | 36 (8.8) | 138 (8.5) | 43 (13.6) | 115 (8.7) | 66 (10.8) | 45 (9.5) | 136 (9.3) |
| Symptoms & signs | 15 (9.9) | 45 (6.8) | 40 (4.2) | 7 (4.3) | 73 (4.8) | 34 (8.3) | 86 (5.3) | 21 (6.6) | 65 (4.9) | 42 (6.9) | 28 (5.9) | 79 (5.4) |
| External Cause Category | 18-29 | 30-44 | 45-59 | 60+ | Male | Female | Ambulatory | Residential | Licit | Illicit | Completed | Not Completed |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Assaults | 55 (13.4) | 54 (4.6) | 11 (0.9) | 0 (0.0) | 103 (4.3) | 17 (2.8) | 90 (3.6) | 30 (5.9) | 20 (1.1) | 100 (8.0) | 9 (1.3) | 111 (4.8) |
| Intentional self-harm | 99 (24.1) | 220 (18.6) | 74 (6.1) | 9 (4.7) | 323 (13.5) | 79 (13.2) | 333 (13.4) | 69 (13.6) | 129 (7.4) | 273 (21.8) | 72 (10.7) | 330 (14.2) |
| No external causes | 154 (37.5) | 678 (57.4) | 982 (81.0) | 168 (88.4) | 1560 (65.1) | 422 (70.3) | 1660 (66.7) | 322 (63.4) | 1352 (77.6) | 630 (50.2) | 492 (73.3) | 1490 (64.1) |
| Other external causes | 1 (0.2) | 3 (0.3) | 3 (0.2) | 0 (0.0) | 6 (0.3) | 1 (0.2) | 6 (0.2) | 1 (0.2) | 2 (0.1) | 5 (0.4) | 1 (0.1) | 6 (0.3) |
| Other unintentional external causes of injury | 59 (14.4) | 145 (12.3) | 107 (8.8) | 12 (6.3) | 271 (11.3) | 52 (8.7) | 265 (10.7) | 58 (11.4) | 174 (10.0) | 149 (11.9) | 65 (9.7) | 258 (11.1) |
| Transport accidents | 43 (10.5) | 82 (6.9) | 36 (3.0) | 1 (0.5) | 133 (5.6) | 29 (4.8) | 134 (5.4) | 28 (5.5) | 65 (3.7) | 97 (7.7) | 32 (4.8) | 130 (5.6) |
Code
mx_1x1_comp<-
rbind.data.frame(cbind.data.frame(sex="male", mltper_1x1),
cbind.data.frame(sex="female", fltper_1x1))
mx_1x1_comp$Age<- as.numeric(mx_1x1_comp$Age)Warning: NAs introducidos por coerción
Code
mx_1x1_comp_filt<-mx_1x1_comp[as.numeric(as.character(mx_1x1_comp$Year)) %in% years_followup,]
mx_1x1_comp_filt2<-mx_1x1_comp_filt[as.numeric(as.character(mx_1x1_comp_filt$Age)) %in%
min(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int):max(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int),]
order_levels <- c(
"Infectious & parasitic",
"Malignant neoplasms",
"Endocrine & metabolic",
"Mental and behavioral",
"Nervous system",
"Circulatory",
"Respiratory",
"Digestive",
"Symptoms & signs",
"Other underlying causes"
)
cat("Mortality SUD population, underlying\n")
mortality_deduplicated|>
dplyr::filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
dplyr::filter(hashkey %in% clean_df$hash_key) |>
dplyr::mutate(
chapter = recode(chapter,
"Mental and behavioural" = "Mental and behavioral",
"Other causes" = "Other underlying causes"
),
chapter = factor(chapter, levels = order_levels, ordered = TRUE)
) %>%
dplyr::arrange(chapter) |>
janitor::tabyl(chapter, show_na = T) |>
mutate(percent= sprintf("%1.1f",100*percent)) |>
pull(percent)
sum(as.numeric(mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% clean_df$hash_key) |>
janitor::tabyl(chapter, show_na = T) |>
mutate(percent= sprintf("%1.3f",100*percent)) |>
pull(percent))[-11])
cat("Mortality SUD population, external\n")
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% clean_df$hash_key) |>
janitor::tabyl(category, show_na = T) |>
mutate(percent= sprintf("%1.1f",100*percent)) |>
pull(percent)
sum(as.numeric(mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
filter(hashkey %in% clean_df$hash_key) |>
janitor::tabyl(category, show_na = T) |>
mutate(percent= sprintf("%1.3f",100*percent)) |> pull(percent))[-3])
cat("Mortality gral population, underlying\n")
mortality_deduplicated|>
dplyr::filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
#dplyr::filter(hashkey %in% clean_df$hash_key) |>
dplyr::mutate(
chapter = recode(chapter,
"Mental and behavioural" = "Mental and behavioral",
"Other causes" = "Other underlying causes"
),
chapter = factor(chapter, levels = order_levels, ordered = TRUE)
) %>%
dplyr::arrange(chapter) |>
janitor::tabyl(chapter, show_na = T) |>
mutate(percent= sprintf("%1.1f",100*percent)) |>
pull(percent)
sum(as.numeric(mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
#filter(hashkey %in% clean_df$hash_key) |>
janitor::tabyl(chapter, show_na = T) |>
mutate(percent= sprintf("%1.3f",100*percent)) |>
pull(percent))[-11])
cat("Mortality gral population, external\n")
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
#filter(hashkey %in% clean_df$hash_key) |>
janitor::tabyl(category, show_na = T) |>
mutate(percent= sprintf("%1.1f",100*percent))
sum(as.numeric(
mortality_deduplicated|>
filter(death_date>="2010-01-01", death_date<"2021-01-01")|>
#filter(hashkey %in% clean_df$hash_key) |>
janitor::tabyl(category, show_na = T) |>
mutate(percent= sprintf("%1.2f",100*percent)) |> pull(percent))[-3])
mortality_summary <-
mortality_deduplicated |>
filter(ano_def>=2010) |>
filter(!is.na(category)) |>
#filter(!grepl("Other causes",category)) |>
group_by(ano_def, sex_rec, edad_cant) |>
summarise(assaults=sum(category=="Assaults", na.rm=T),
self_harm=sum(category=="Intentional self-harm", na.rm=T),
other_causes=sum(category=="No external causes", na.rm=T),
other_ext_causes= sum(category=="Other external causes", na.rm=T),
other_ext_causes_unint_inj= sum(grepl("Other unintentional",category), na.rm=T),
transport_accidents= sum(grepl("Transport",category), na.rm=T)
)Code
mx_1x1_comp_filt3 <-
mx_1x1_comp_filt2|>
mutate(edad_cant_cat = dplyr::case_when(
Age>= 18 & Age < 30 ~ "18-29",
Age>= 30 & Age < 45 ~ "30-44",
Age>= 45 & Age < 60 ~ "45-59",
Age>= 60 & Age < 86 ~ "60+",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
),
sex_rec = dplyr::case_when( # optional renaming step
sex == "male" ~ "Male",
sex == "female" ~ "Female",
TRUE~ NA_character_),
sex_rec= factor(sex_rec, levels = c("Male", "Female"))
)|>
left_join(mortality_summary, by= c("Year"="ano_def", "sex_rec"= "sex_rec", "Age"= "edad_cant"))|>
group_by(Year, sex_rec, edad_cant_cat)|>
summarise(
sum_assaults = sum(assaults),
sum_self_harm = sum(self_harm),
sum_other_causes = sum(other_causes),
sum_other_ext_causes = sum(other_ext_causes),
sum_other_unint_ext_causes = sum(other_ext_causes_unint_inj),
sum_transport_accidents = sum(transport_accidents),
Lx_total = sum(Lx) # Sumar los años-persona
) |>
mutate(haz_assaults= sum_assaults/Lx_total,
haz_self_harm= sum_self_harm/Lx_total,
haz_other_causes= sum_other_causes/Lx_total,
haz_other_ext_causes= sum_other_ext_causes/Lx_total,
haz_other_unint_ext_causes= sum_other_unint_ext_causes/Lx_total,
haz_transport_accidents= sum_transport_accidents/Lx_total) Code
mx_1x1_comp_filt3 <-
mx_1x1_comp_filt3|>
mutate(agegroup = dplyr::case_when(
grepl("18",edad_cant_cat)~ 18,
grepl("30",edad_cant_cat)~ 30,
grepl("45",edad_cant_cat)~ 45,
grepl("60",edad_cant_cat)~ 60,
TRUE ~ NA_real_ # Opcional: manejo de valores fuera de rango
))|>
rename("year"="Year", "sex"="sex_rec")
c_SISTRAT_c1_assaults <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = category=="Assaults",
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_assaults_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_assaults, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
ref.rate = 'haz_assaults',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_self_harm <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = category=="Intentional self-harm",
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_self_harm_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_self_harm, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
ref.rate = 'haz_self_harm',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_transport <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = category=="Transport accidents",
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_accidents_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_transport, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
ref.rate = 'haz_transport_accidents',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_other_ext <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = category=="Other external causes",
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_other_ext_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_other_ext, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
ref.rate = 'haz_other_ext_causes',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_other_unint_ext <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Other unintentional",category),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_other_unint_ext_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_other_unint_ext, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
ref.rate = 'haz_other_unint_ext_causes',
adjust = c('agegroup','year','sex'),
EAR=T)
sr_1_other_ext_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_other_ext, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
ref.rate = 'haz_other_ext_causes',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_other <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = category=="No external causes",
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_other_causes_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_other, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
ref.rate = 'haz_other_causes',
adjust = c('agegroup','year','sex'),
EAR=T)
sir_assaults_df<-
cbind.data.frame(
total= "Assaults/ Aggressions (X85–Y09)",
observed= round(sr_1_assaults_df$observed,0),
pyrs= round(sr_1_assaults_df$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_assaults_df$observed, sr_1_assaults_df$pyrs, phi= 1))))),
expected= round(sr_1_assaults_df$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_assaults_df, phi= extract_phi(c_SISTRAT_c1_assaults))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sr_1_assaults_df$EAR)),
phi=extract_phi(c_SISTRAT_c1_assaults))Warning: glm.fit: fitted rates numerically 0 occurred
Warning: glm.fit: fitted rates numerically 0 occurred
Code
sir_self_harm_df<-
cbind.data.frame(
total= "Intentional self-harm (X60–X84)",
observed= round(sr_1_self_harm_df$observed,0),
pyrs= round(sr_1_self_harm_df$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_self_harm_df$observed, sr_1_self_harm_df$pyrs, phi= 1))))),
expected= round(sr_1_self_harm_df$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_self_harm_df, phi= extract_phi(c_SISTRAT_c1_self_harm))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sr_1_self_harm_df$EAR)),
phi=extract_phi(c_SISTRAT_c1_self_harm))
sir_other_unint_ext_df<-
cbind.data.frame(
total= "Other unintentional external causes of injury",
observed= round(sr_1_other_unint_ext_df$observed,0),
pyrs= round(sr_1_other_unint_ext_df$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_other_unint_ext_df$observed, sr_1_other_unint_ext_df$pyrs, phi= 1))))),
expected= round(sr_1_other_unint_ext_df$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_other_unint_ext_df, phi= extract_phi(c_SISTRAT_c1_other_unint_ext))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sr_1_other_unint_ext_df$EAR)),
phi=extract_phi(c_SISTRAT_c1_other_unint_ext))
sir_other_df<-
cbind.data.frame(
total= "No external causes",
observed= round(sr_1_other_causes_df$observed,0),
pyrs= round(sr_1_other_causes_df$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_other_causes_df$observed, sr_1_other_causes_df$pyrs, phi= 1))))),
expected= round(sr_1_other_causes_df$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_other_causes_df, phi= extract_phi(c_SISTRAT_c1_other))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sr_1_other_causes_df$EAR)),
phi=extract_phi(c_SISTRAT_c1_other))
sir_transport_df<-
cbind.data.frame(
total= "Transport accidents",
observed= round(sr_1_accidents_df$observed,0),
pyrs= round(sr_1_accidents_df$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_accidents_df$observed, sr_1_accidents_df$pyrs, phi= 1))))),
expected= round(sr_1_accidents_df$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_accidents_df, phi= extract_phi(c_SISTRAT_c1_transport))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sr_1_accidents_df$EAR)),
phi=extract_phi(c_SISTRAT_c1_transport))
#c_SISTRAT_c1_assaults c_SISTRAT_c1_self_harm c_SISTRAT_c1_transport c_SISTRAT_c1_other_ext c_SISTRAT_c1_other
#sr_1_assaults_df sr_1_self_harm_df sr_1_accidents_df sr_1_other_unint_ext_df sr_1_other_causes_df
cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sir_assaults_df, sir_self_harm_df, sir_other_unint_ext_df, sir_other_df, sir_transport_df)|>
rename("Characteristic"="total")|>
(\(df) {
df->> df_smr_ind_ext
df
})()|>
extract(
SMR,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
mutate(obs_exp= paste0(observed, "/", expected)) |>
knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group. External causes")Mortality SUD population, underlying
[1] "4.3" "7.3" "1.4" "1.6" "1.6" "13.6" "6.0" "23.5" "3.6" "1.8"
[11] "35.3"
[1] 64.72
Mortality SUD population, external
[1] "4.0" "13.4" "66.2" "0.2" "10.8" "5.4"
[1] 33.845
Mortality gral population, underlying
[1] "2.4" "28.4" "4.9" "1.3" "3.1" "25.1" "8.4" "8.4" "1.8" "5.7"
[11] "10.5"
[1] 89.503
Mortality gral population, external
category n percent
Assaults 6851 0.8
Intentional self-harm 19326 2.3
No external causes 780325 91.3
Other external causes 1552 0.2
Other unintentional external causes of injury 26382 3.1
Transport accidents 20707 2.4
[1] 8.75
Dispersion-corrected 95% confidence intervals
| Characteristic | observed | pyrs | CMR_1000 | expected | EAR | phi | SMR_dir | obs_exp |
|---|---|---|---|---|---|---|---|---|
| Assaults/ Aggressions (X85–Y09) | 120 | 353826 | 0.3 (0.3–0.4) | 33 | 0.25 | 0.8608439 | 3.63 (3.07–4.28) | 120/33 |
| Intentional self-harm (X60–X84) | 402 | 353826 | 1.1 (1.0–1.3) | 82 | 0.90 | 1.0840996 | 4.91 (4.43–5.44) | 402/82 |
| Other unintentional external causes of injury | 323 | 353826 | 0.9 (0.8–1.0) | 82 | 0.68 | 1.9441330 | 3.95 (3.39–4.60) | 323/82 |
| No external causes | 1982 | 353826 | 5.6 (5.4–5.9) | 739 | 3.51 | 1.3468431 | 2.68 (2.55–2.82) | 1982/739 |
| Transport accidents | 162 | 353826 | 0.5 (0.4–0.5) | 78 | 0.24 | 0.9231048 | 2.09 (1.80–2.42) | 162/78 |
Code
r2_adj_assaults <-
rate(
data = c_SISTRAT_c1_assaults,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_assaults <- mapply(
dsr_format_corr, # FUN
r2_adj_assaults$rate.adj, # primer vector (rate)
r2_adj_assaults$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
#phi = extract_phi_dir(c_SISTRAT_c1_assaults),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_self_harm <-
rate(
data = c_SISTRAT_c1_self_harm,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_self_harm <- mapply(
dsr_format_corr, # FUN
r2_adj_self_harm$rate.adj, # primer vector (rate)
r2_adj_self_harm$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_self_harm),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_transport <-
rate(
data = c_SISTRAT_c1_transport,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_transport <- mapply(
dsr_format_corr, # FUN
r2_adj_transport$rate.adj, # primer vector (rate)
r2_adj_transport$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_transport),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_other_unint_ext <-
rate(
data = c_SISTRAT_c1_other_unint_ext,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_other_unint_ext <- mapply(
dsr_format_corr, # FUN
r2_adj_other_unint_ext$rate.adj, # primer vector (rate)
r2_adj_other_unint_ext$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_other_unint_ext),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_other <-
rate(
data = c_SISTRAT_c1_other,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_other <- mapply(
dsr_format_corr, # FUN
r2_adj_other$rate.adj, # primer vector (rate)
r2_adj_other$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_other),
factor = 1e3,
digits = 6,
conf = 0.95))
tasa_ponderada <- mx_1x1_comp_filt3 |>
ungroup() |>
left_join(
weights_df,
by = c("year", "agegroup", "sex")) |>
summarise(
haz_assaults_w = sum(haz_assaults * weights),
haz_self_harm_w = sum(haz_self_harm * weights),
haz_other_causes_w = sum(haz_other_causes * weights),
haz_other_unint_ext_causes_w = sum(haz_other_unint_ext_causes * weights),
haz_transport_accidents_w = sum(haz_transport_accidents * weights)
) *1e3
round(tasa_ponderada,2)
rbind.data.frame(
cbind.data.frame(var="Assaults/ Aggressions", t(r2_adj_assaults[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_assaults),
cbind.data.frame(var="Intentional self-harm", t(r2_adj_self_harm[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_self_harm),
cbind.data.frame(var="Other unintentional external causes", t(r2_adj_other_unint_ext[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_other_unint_ext),
cbind.data.frame(var="No external causes", t(r2_adj_other[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_other),
cbind.data.frame(var="Transport accidents", t(r2_adj_transport[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_transport)
)|>
(\(df) {
df->> df_smr_dir_ext
df
})()|>
mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|>
mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
dplyr::select(-any_of(2:7))|>
extract(
SMR_dir,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci")|>
knitr::kable("markdown", caption= "SMRs, direct method, last treatment followed") haz_assaults_w haz_self_harm_w haz_other_causes_w
1 0.06 0.17 2.71
haz_other_unint_ext_causes_w haz_transport_accidents_w
1 0.17 0.17
| var | CMR | DSR | DSR (SEs robust to dispersion) |
|---|---|---|---|
| Assaults/ Aggressions | 0.3 (0.3–0.4) | 0.3 (0.2–0.4) | 0.3 (0.2–0.4) |
| Intentional self-harm | 1.1 (1.0–1.3) | 1.0 (0.8–1.2) | 1.0 (0.8–1.2) |
| Other unintentional external causes | 0.9 (0.8–1.0) | 3.3 (0.8–14.3) | 3.3 (0.4–24.3) |
| No external causes | 5.6 (5.4–5.9) | 8.1 (7.1–9.3) | 8.1 (7.1–9.3) |
| Transport accidents | 0.5 (0.4–0.5) | 0.4 (0.3–0.6) | 0.4 (0.3–0.5) |
Code
# Create mortality summary by disease category
mortality_summary2 <-
mortality_deduplicated |>
filter(ano_def>=2010) |>
filter(!is.na(category)) |>
#filter(!grepl("Other causes",category)) |>
group_by(ano_def, sex_rec, edad_cant) |>
summarise(circulatory=sum(chapter=="Circulatory", na.rm=T),
digestive=sum(chapter=="Digestive", na.rm=T),
endocrine_metabolic= sum(grepl("Endocrine", chapter), na.rm=T),
infectious_parasitic= sum(grepl("parasitic", chapter), na.rm=T),
malignant_neoplasms= sum(grepl("neoplasms", chapter), na.rm=T),
mental= sum(grepl("Mental", chapter), na.rm=T),
nervous= sum(grepl("Nervous", chapter), na.rm=T),
other_causes= sum(grepl("Other", chapter), na.rm=T),
respiratory= sum(grepl("Respiratory",chapter), na.rm=T),
symptoms_signs= sum(grepl("Symptoms",chapter), na.rm=T)
)Code
# Create hazard rates dataset with age categories and disease-specific hazards
mx_1x1_comp_filt3b <-
mx_1x1_comp_filt2|>
mutate(edad_cant_cat = dplyr::case_when(
Age>= 18 & Age < 30 ~ "18-29",
Age>= 30 & Age < 45 ~ "30-44",
Age>= 45 & Age < 60 ~ "45-59",
Age>= 60 & Age < 86 ~ "60+",
TRUE ~ NA_character_ # Opcional: manejo de valores fuera de rango
),
sex_rec = dplyr::case_when( # optional renaming step
sex == "male" ~ "Male",
sex == "female" ~ "Female",
TRUE~ NA_character_),
sex_rec= factor(sex_rec, levels = c("Male", "Female"))
)|>
left_join(mortality_summary2, by= c("Year"="ano_def", "sex_rec"= "sex_rec", "Age"= "edad_cant"))|>
group_by(Year, sex_rec, edad_cant_cat)|>
summarise(
sum_circulatory = sum(circulatory, na.rm=T),
sum_digestive = sum(digestive, na.rm=T),
sum_endocrine_metabolic = sum(endocrine_metabolic, na.rm=T),
sum_infectious_parasitic = sum(infectious_parasitic, na.rm=T),
sum_malignant_neoplasms = sum(malignant_neoplasms, na.rm=T),
sum_mental = sum(mental, na.rm=T),
sum_nervous = sum(nervous, na.rm=T),
sum_other_causes = sum(other_causes, na.rm=T),
sum_respiratory = sum(respiratory, na.rm=T),
sum_symptoms_signs = sum(symptoms_signs, na.rm=T),
Lx_total = sum(Lx) # Sumar los años-persona
) |>
mutate(haz_circulatory= sum_circulatory/Lx_total,
haz_digestive= sum_digestive/Lx_total,
haz_endocrine_metabolic= sum_endocrine_metabolic/Lx_total,
haz_infectious_parasitic= sum_infectious_parasitic/Lx_total,
haz_malignant_neoplasms= sum_malignant_neoplasms/Lx_total,
haz_mental= sum_mental/Lx_total,
haz_nervous= sum_nervous/Lx_total,
haz_other_causes= sum_other_causes/Lx_total,
haz_respiratory= sum_respiratory/Lx_total,
haz_symptoms_signs= sum_symptoms_signs/Lx_total
)Code
# Add agegroup variable and rename columns for analysis
mx_1x1_comp_filt3b <-
mx_1x1_comp_filt3b|>
mutate(agegroup = dplyr::case_when(
grepl("18",edad_cant_cat)~ 18,
grepl("30",edad_cant_cat)~ 30,
grepl("45",edad_cant_cat)~ 45,
grepl("60",edad_cant_cat)~ 60,
TRUE ~ NA_real_ # Opcional: manejo de valores fuera de rango
))|>
rename("year"="Year", "sex"="sex_rec")
c_SISTRAT_c1_circulatory <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Circulatory",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_circulatory_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_circulatory, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_circulatory',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_digestive <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Digestive",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_digestive_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_digestive, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_digestive',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_endocrine_metabolic <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Endocrine",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_endocrine_metabolic_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_endocrine_metabolic, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_endocrine_metabolic',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_infectious_parasitic <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("parasitic",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_infectious_parasitic_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_infectious_parasitic, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_infectious_parasitic',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_malignant_neoplasms <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("neoplasms",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_malignant_neoplasms_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_malignant_neoplasms, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_malignant_neoplasms',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_mental <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Mental",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_mental_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_mental, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_mental',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_nervous <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Nervous",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_nervous_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_nervous, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_nervous',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_other_causes <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Other",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_other_causes_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_other_causes, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_other_causes',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_respiratory <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Respiratory",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_respiratory_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_respiratory, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_respiratory',
adjust = c('agegroup','year','sex'),
EAR=T)
c_SISTRAT_c1_symptoms_signs <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
status = grepl("Symptoms",chapter),
birth = birth_date_rec,
exit = death_date_rec, entry = disch_date_rec6,
#2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
breaks = list(per = seq(2010, 2021, by = 1),
#2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_symptoms_signs_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_symptoms_signs, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
ref.rate = 'haz_symptoms_signs',
adjust = c('agegroup','year','sex'),
EAR=T)
sir_circulatory_df <-
cbind.data.frame(
total = "Circulatory System Diseases",
observed = round(sr_1_circulatory_df$observed, 0),
pyrs = round(sr_1_circulatory_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_circulatory_df$observed, sr_1_circulatory_df$pyrs, phi = 1))))),
expected = round(sr_1_circulatory_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_circulatory_df, phi = extract_phi(c_SISTRAT_c1_circulatory))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_circulatory_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_circulatory))
sir_digestive_df <-
cbind.data.frame(
total = "Digestive System Diseases",
observed = round(sr_1_digestive_df$observed, 0),
pyrs = round(sr_1_digestive_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_digestive_df$observed, sr_1_digestive_df$pyrs, phi = 1))))),
expected = round(sr_1_digestive_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_digestive_df, phi = extract_phi(c_SISTRAT_c1_digestive))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_digestive_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_digestive))
sir_endocrine_metabolic_df <-
cbind.data.frame(
total = "Endocrine and Metabolic Diseases",
observed = round(sr_1_endocrine_metabolic_df$observed, 0),
pyrs = round(sr_1_endocrine_metabolic_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_endocrine_metabolic_df$observed, sr_1_endocrine_metabolic_df$pyrs, phi = 1))))),
expected = round(sr_1_endocrine_metabolic_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_endocrine_metabolic_df, phi = extract_phi(c_SISTRAT_c1_endocrine_metabolic))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_endocrine_metabolic_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_endocrine_metabolic))
sir_infectious_parasitic_df <-
cbind.data.frame(
total = "Infectious and Parasitic Diseases",
observed = round(sr_1_infectious_parasitic_df$observed, 0),
pyrs = round(sr_1_infectious_parasitic_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_infectious_parasitic_df$observed, sr_1_infectious_parasitic_df$pyrs, phi = 1))))),
expected = round(sr_1_infectious_parasitic_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_infectious_parasitic_df, phi = extract_phi(c_SISTRAT_c1_infectious_parasitic))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_infectious_parasitic_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_infectious_parasitic))
sir_malignant_neoplasms_df <-
cbind.data.frame(
total = "Malignant neoplasms",
observed = round(sr_1_malignant_neoplasms_df$observed, 0),
pyrs = round(sr_1_malignant_neoplasms_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_malignant_neoplasms_df$observed, sr_1_malignant_neoplasms_df$pyrs, phi = 1))))),
expected = round(sr_1_malignant_neoplasms_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_malignant_neoplasms_df, phi = extract_phi(c_SISTRAT_c1_malignant_neoplasms))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_malignant_neoplasms_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_malignant_neoplasms))
sir_mental_df <-
cbind.data.frame(
total = "Mental and behavioral",
observed = round(sr_1_mental_df$observed, 0),
pyrs = round(sr_1_mental_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_mental_df$observed, sr_1_mental_df$pyrs, phi = 1))))),
expected = round(sr_1_mental_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_mental_df, phi = extract_phi(c_SISTRAT_c1_mental))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_mental_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_mental))
sir_nervous_df <-
cbind.data.frame(
total = "Nervous system",
observed = round(sr_1_nervous_df$observed, 0),
pyrs = round(sr_1_nervous_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_nervous_df$observed, sr_1_nervous_df$pyrs, phi = 1))))),
expected = round(sr_1_nervous_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_nervous_df, phi = extract_phi(c_SISTRAT_c1_nervous))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_nervous_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_nervous))
sir_other_causes_df <-
cbind.data.frame(
total = "Other causes",
observed = round(sr_1_other_causes_df$observed, 0),
pyrs = round(sr_1_other_causes_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_other_causes_df$observed, sr_1_other_causes_df$pyrs, phi = 1))))),
expected = round(sr_1_other_causes_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_other_causes_df, phi = extract_phi(c_SISTRAT_c1_other_causes))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_other_causes_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_other_causes))
sir_respiratory_df <-
cbind.data.frame(
total = "Respiratory System Diseases",
observed = round(sr_1_respiratory_df$observed, 0),
pyrs = round(sr_1_respiratory_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_respiratory_df$observed, sr_1_respiratory_df$pyrs, phi = 1))))),
expected = round(sr_1_respiratory_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_respiratory_df, phi = extract_phi(c_SISTRAT_c1_respiratory))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_respiratory_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_respiratory))
sir_symptoms_signs_df <-
cbind.data.frame(
total = "Symptoms, Signs and Abnormal Findings",
observed = round(sr_1_symptoms_signs_df$observed, 0),
pyrs = round(sr_1_symptoms_signs_df$pyrs, 0),
CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_symptoms_signs_df$observed, sr_1_symptoms_signs_df$pyrs, phi = 1))))),
expected = round(sr_1_symptoms_signs_df$expected, 0),
SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_symptoms_signs_df, phi = extract_phi(c_SISTRAT_c1_symptoms_signs))[, 1:3])))),
EAR = as.character(sprintf("%.2f", sr_1_symptoms_signs_df$EAR)),
phi = extract_phi(c_SISTRAT_c1_symptoms_signs))
map <- c(
"Infectious and Parasitic Diseases" = "Infectious & parasitic",
"Malignant neoplasms" = "Malignant neoplasms",
"Endocrine and Metabolic Diseases" = "Endocrine & metabolic",
"Mental and behavioral" = "Mental and behavioral",
"Nervous system" = "Nervous system",
"Circulatory System Diseases" = "Circulatory",
"Respiratory System Diseases" = "Respiratory",
"Digestive System Diseases" = "Digestive",
"Symptoms, Signs and Abnormal Findings" = "Symptoms & signs",
"Other causes" = "Other underlying causes"
)
cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sir_circulatory_df, sir_digestive_df, sir_endocrine_metabolic_df,
sir_infectious_parasitic_df, sir_malignant_neoplasms_df, sir_mental_df, sir_nervous_df, sir_other_causes_df, sir_respiratory_df, sir_symptoms_signs_df)|>
rename("Characteristic"="total")|>
(\(df) {
df->> df_smr_ind_non_ext
df
})()|>
tidyr::extract(
SMR,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
dplyr::mutate(obs_exp= paste0(observed, "/", expected))|>
dplyr::mutate(key = map[Characteristic],
ord = match(key, order_levels))|>
dplyr::arrange(ord)|>
dplyr::select(-key, -ord)|>
knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group. Non-external causes")Dispersion-corrected 95% confidence intervals
| Characteristic | observed | pyrs | CMR_1000 | expected | EAR | phi | SMR_dir | obs_exp |
|---|---|---|---|---|---|---|---|---|
| Infectious and Parasitic Diseases | 129 | 353826 | 0.4 (0.3–0.4) | 43 | 0.24 | 0.6533812 | 2.97 (2.58–3.41) | 129/43 |
| Malignant neoplasms | 220 | 353826 | 0.6 (0.5–0.7) | 205 | 0.04 | 1.0672357 | 1.07 (0.94–1.23) | 220/205 |
| Endocrine and Metabolic Diseases | 42 | 353826 | 0.1 (0.1–0.2) | 28 | 0.04 | 0.5089105 | 1.52 (1.23–1.89) | 42/28 |
| Mental and behavioral | 49 | 353826 | 0.1 (0.1–0.2) | 7 | 0.12 | 0.4737777 | 6.71 (5.53–8.13) | 49/7 |
| Nervous system | 47 | 353826 | 0.1 (0.1–0.2) | 27 | 0.06 | 0.7932892 | 1.74 (1.35–2.25) | 47/27 |
| Circulatory System Diseases | 407 | 353826 | 1.2 (1.0–1.3) | 170 | 0.67 | 0.7945663 | 2.40 (2.20–2.62) | 407/170 |
| Respiratory System Diseases | 181 | 353826 | 0.5 (0.4–0.6) | 45 | 0.38 | 0.7598600 | 4.06 (3.57–4.60) | 181/45 |
| Digestive System Diseases | 704 | 353826 | 2.0 (1.8–2.1) | 113 | 1.67 | 1.5908436 | 6.22 (5.67–6.83) | 704/113 |
| Symptoms, Signs and Abnormal Findings | 107 | 353826 | 0.3 (0.3–0.4) | 27 | 0.22 | 0.6869083 | 3.89 (3.33–4.56) | 107/27 |
| Other causes | 53 | 353826 | 0.1 (0.1–0.2) | 42 | 0.03 | 1.1576887 | 1.25 (0.94–1.67) | 53/42 |
Code
r2_adj_circulatory <-
rate(
data = c_SISTRAT_c1_circulatory,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_circulatory <- mapply(
dsr_format_corr, # FUN
r2_adj_circulatory$rate.adj, # primer vector (rate)
r2_adj_circulatory$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_circulatory),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_digestive <-
rate(
data = c_SISTRAT_c1_digestive,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_digestive <- mapply(
dsr_format_corr, # FUN
r2_adj_digestive$rate.adj, # primer vector (rate)
r2_adj_digestive$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_digestive),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_endocrine_metabolic <-
rate(
data = c_SISTRAT_c1_endocrine_metabolic,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_endocrine_metabolic <- mapply(
dsr_format_corr, # FUN
r2_adj_endocrine_metabolic$rate.adj, # primer vector (rate)
r2_adj_endocrine_metabolic$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_endocrine_metabolic),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_infectious_parasitic <-
rate(
data = c_SISTRAT_c1_infectious_parasitic,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_infectious_parasitic <- mapply(
dsr_format_corr, # FUN
r2_adj_infectious_parasitic$rate.adj, # primer vector (rate)
r2_adj_infectious_parasitic$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_infectious_parasitic),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_malignant_neoplasms <-
rate(
data = c_SISTRAT_c1_malignant_neoplasms,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_malignant_neoplasms <- mapply(
dsr_format_corr, # FUN
r2_adj_malignant_neoplasms$rate.adj, # primer vector (rate)
r2_adj_malignant_neoplasms$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_malignant_neoplasms),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_mental <-
rate(
data = c_SISTRAT_c1_mental,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_mental <- mapply(
dsr_format_corr, # FUN
r2_adj_mental$rate.adj, # primer vector (rate)
r2_adj_mental$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_mental),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_nervous <-
rate(
data = c_SISTRAT_c1_nervous,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_nervous <- mapply(
dsr_format_corr, # FUN
r2_adj_nervous$rate.adj, # primer vector (rate)
r2_adj_nervous$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_nervous),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_other_causes <-
rate(
data = c_SISTRAT_c1_other_causes,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_other_causes <- mapply(
dsr_format_corr, # FUN
r2_adj_other_causes$rate.adj, # primer vector (rate)
r2_adj_other_causes$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_other_causes),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_respiratory <-
rate(
data = c_SISTRAT_c1_respiratory,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_respiratory <- mapply(
dsr_format_corr, # FUN
r2_adj_respiratory$rate.adj, # primer vector (rate)
r2_adj_respiratory$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_respiratory),
factor = 1e3,
digits = 6,
conf = 0.95))
r2_adj_symptoms_signs <-
rate(
data = c_SISTRAT_c1_symptoms_signs,
obs = from0to1,
pyrs = pyrs,
#print = year,
adjust = c("year", "sex", "agegroup"),
weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k_symptoms_signs <- mapply(
dsr_format_corr, # FUN
r2_adj_symptoms_signs$rate.adj, # primer vector (rate)
r2_adj_symptoms_signs$SE.rate.adj, # segundo vector (se)
MoreArgs = list( # argumentos fijos extra
phi = extract_phi_dir(c_SISTRAT_c1_symptoms_signs),
factor = 1e3,
digits = 6,
conf = 0.95))
tasa_ponderada_diseases <- mx_1x1_comp_filt3b |>
ungroup() |>
left_join(
weights_df,
by = c("year", "agegroup", "sex")) |>
summarise(
haz_circulatory_w = sum(haz_circulatory * weights),
haz_digestive_w = sum(haz_digestive * weights),
haz_endocrine_metabolic_w = sum(haz_endocrine_metabolic * weights),
haz_infectious_parasitic_w = sum(haz_infectious_parasitic * weights),
haz_malignant_neoplasms_w = sum(haz_malignant_neoplasms * weights),
haz_mental_w = sum(haz_mental * weights),
haz_nervous_w = sum(haz_nervous * weights),
haz_other_causes_w = sum(haz_other_causes * weights),
haz_respiratory_w = sum(haz_respiratory * weights),
haz_symptoms_signs_w = sum(haz_symptoms_signs * weights)
) *1e3
cbind.data.frame(
rbind.data.frame(
cbind.data.frame(var="Circulatory System Diseases", t(r2_adj_circulatory[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_circulatory),
cbind.data.frame(var="Digestive System Diseases", t(r2_adj_digestive[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_digestive),
cbind.data.frame(var="Endocrine and Metabolic Diseases", t(r2_adj_endocrine_metabolic[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_endocrine_metabolic),
cbind.data.frame(var="Infectious and Parasitic Diseases", t(r2_adj_infectious_parasitic[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_infectious_parasitic),
cbind.data.frame(var="Malignant neoplasms", t(r2_adj_malignant_neoplasms[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_malignant_neoplasms),
cbind.data.frame(var="Mental and behavioral", t(r2_adj_mental[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_mental),
cbind.data.frame(var="Nervous system", t(r2_adj_nervous[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_nervous),
cbind.data.frame(var="Other Causes", t(r2_adj_other_causes[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_other_causes),
cbind.data.frame(var="Respiratory System Diseases", t(r2_adj_respiratory[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_respiratory),
cbind.data.frame(var="Symptoms, Signs and Abnormal Findings", t(r2_adj_symptoms_signs[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_symptoms_signs)
), perc= t(round(tasa_ponderada_diseases,1)))|>
(\(df) {
rownames(df) <- NULL
df->> df_smr_dir_diseases
df
})()|>
dplyr::mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|>
dplyr::mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
dplyr::select(-any_of(2:7))|>
tidyr::extract(
SMR_dir,
into = c("est", "low", "high"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>
dplyr::select(-est, -low, -high)|>
rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci")|>
dplyr::mutate(key = map[var],
ord = match(key, order_levels))|>
dplyr::arrange(ord)|>
dplyr::select(-key, -ord)|>
knitr::kable("markdown", caption= "Disease-specific SMRs, direct method, underlying")| var | perc | CMR | DSR | DSR (SEs robust to dispersion) |
|---|---|---|---|---|
| Infectious and Parasitic Diseases | 0.1 | 0.4 (0.3–0.4) | 0.4 (0.3–0.7) | 0.4 (0.3–0.6) |
| Malignant neoplasms | 0.9 | 0.6 (0.5–0.7) | 1.2 (0.9–1.6) | 1.2 (0.9–1.6) |
| Endocrine and Metabolic Diseases | 0.1 | 0.1 (0.1–0.2) | 0.1 (0.1–0.2) | 0.1 (0.1–0.2) |
| Mental and behavioral | 0.0 | 0.1 (0.1–0.2) | 0.1 (0.1–0.2) | 0.1 (0.1–0.1) |
| Nervous system | 0.1 | 0.1 (0.1–0.2) | 0.3 (0.1–0.7) | 0.3 (0.1–0.6) |
| Circulatory System Diseases | 0.6 | 1.2 (1.0–1.3) | 1.7 (1.2–2.5) | 1.8 (1.3–2.4) |
| Respiratory System Diseases | 0.2 | 0.5 (0.4–0.6) | 0.5 (0.4–0.7) | 0.5 (0.4–0.7) |
| Digestive System Diseases | 0.4 | 2.0 (1.8–2.1) | 2.9 (2.3–3.7) | 2.9 (2.2–3.8) |
| Symptoms, Signs and Abnormal Findings | 0.1 | 0.3 (0.3–0.4) | 0.6 (0.3–1.0) | 0.6 (0.3–0.9) |
| Other Causes | 0.2 | 0.1 (0.1–0.2) | 0.2 (0.1–0.2) | 0.2 (0.1–0.2) |
Code
sep_ind_ext<-
tibble::tibble(type= "Indirect, main", raw = df_smr_ind_ext$SMR) %>%
extract(
raw,
into = c("estimate", "lower", "upper"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::select(type, estimate, lower, upper)
sep_ind_ext <- bind_cols(Characteristic=df_smr_ind_ext$Characteristic, sep_ind_ext)
sep_dir_ext<-
tibble(type= "Direct, main", raw = df_smr_dir_ext$SMR_dir) %>%
extract(
raw,
into = c("estimate", "lower", "upper"),
regex = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
convert = TRUE # convierte a numérico
)|>
dplyr::select(type, estimate, lower, upper)
sep_dir_ext <- bind_cols(Characteristic=sep_dir_ext$var, sep_ind_ext)Warning: Unknown or uninitialised column: var.
Code
variances_ind_ext <- ((log(sep_ind_ext$upper[c(1,4)]) - log(sep_ind_ext$estimate[c(1,4)])) / qnorm(0.975))^2
meta_fe_ind_ext_fe <- rma(yi = log(sep_ind_ext$estimate[c(1,4)]), sei = sqrt(variances_ind_ext), method = "FE")
variances_ind_ext2 <- ((log(sep_ind_ext$upper[c(2,4)]) - log(sep_ind_ext$estimate[c(2,4)])) / qnorm(0.975))^2
meta_fe_ind_ext_fe2 <- rma(yi = log(sep_ind_ext$estimate[c(2,4)]), sei = sqrt(variances_ind_ext2), method = "FE")
variances_ind_ext3 <- ((log(sep_ind_ext$upper[c(3,4)]) - log(sep_ind_ext$estimate[c(3,4)])) / qnorm(0.975))^2
meta_fe_ind_ext_fe3 <- rma(yi = log(sep_ind_ext$estimate[c(3,4)]), sei = sqrt(variances_ind_ext3), method = "FE")
variances_ind_ext4 <- ((log(sep_ind_ext$upper[c(5,4)]) - log(sep_ind_ext$estimate[c(5,4)])) / qnorm(0.975))^2
meta_fe_ind_ext_fe4 <- rma(yi = log(sep_ind_ext$estimate[c(5,4)]), sei = sqrt(variances_ind_ext4), method = "FE")
variances_dir_ext <- ((log(sep_dir_ext$upper[c(1,4)]) - log(sep_dir_ext$estimate[c(1,4)])) / qnorm(0.975))^2
meta_fe_dir_ext_fe <- rma(yi = log(sep_dir_ext$estimate[c(1,4)]), sei = sqrt(variances_dir_ext), method = "FE")
variances_dir_ext2 <- ((log(sep_dir_ext$upper[c(2,4)]) - log(sep_dir_ext$estimate[c(2,4)])) / qnorm(0.975))^2
meta_fe_dir_ext_fe2 <- rma(yi = log(sep_dir_ext$estimate[c(2,4)]), sei = sqrt(variances_dir_ext2), method = "FE")
variances_dir_ext3 <- ((log(sep_dir_ext$upper[c(3,4)]) - log(sep_dir_ext$estimate[c(3,4)])) / qnorm(0.975))^2
meta_fe_dir_ext_fe3 <- rma(yi = log(sep_dir_ext$estimate[c(3,4)]), sei = sqrt(variances_dir_ext3), method = "FE")
variances_dir_ext4 <- ((log(sep_dir_ext$upper[c(5,4)]) - log(sep_dir_ext$estimate[c(5,4)])) / qnorm(0.975))^2
meta_fe_dir_ext_fe4 <- rma(yi = log(sep_dir_ext$estimate[c(5,4)]), sei = sqrt(variances_dir_ext4), method = "FE")
bind_rows(
cbind.data.frame(type= "", comp= "Assaults vs. No external causes", Q= meta_fe_ind_ext_fe$QE, p= meta_fe_ind_ext_fe$QEp, Q_b= meta_fe_dir_ext_fe$QE, p_b= meta_fe_dir_ext_fe$QEp),
cbind.data.frame(type= "", comp= "Intentional self-harm vs. No external causes", Q= meta_fe_ind_ext_fe2$QE, p= meta_fe_ind_ext_fe2$QEp, Q_b= meta_fe_dir_ext_fe2$QE, p_b= meta_fe_dir_ext_fe2$QEp),
cbind.data.frame(type= "", comp= "Other unintentional causes of injury vs. No external causes", Q= meta_fe_ind_ext_fe3$QE, p= meta_fe_ind_ext_fe3$QEp, Q_b= meta_fe_dir_ext_fe3$QE, p_b= meta_fe_dir_ext_fe3$QEp),
cbind.data.frame(type= "", comp= "Transport accidents vs. No external causes", Q= meta_fe_ind_ext_fe4$QE, p= meta_fe_ind_ext_fe4$QEp, Q_b= meta_fe_dir_ext_fe4$QE, p_b= meta_fe_dir_ext_fe4$QEp)
)|>
mutate(
Qa_SMR = case_when(
str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q, p),
TRUE ~ sprintf("Q %.2f (df=1), p=%.3f", Q, p)
),
Qa_DSR = case_when(
str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q_b, p_b),
TRUE ~ sprintf("Q %.2f (df=1), p=%.3f", Q_b, p_b)
)
) |> dplyr::select(type, comp, Qa_SMR, Qa_DSR) |>
knitr::kable("markdown", caption= "Heterogeneity, external causes")| type | comp | Qa_SMR | Qa_DSR |
|---|---|---|---|
| Assaults vs. No external causes | Q 11.56 (df=1), p=0.001 | Q 11.56 (df=1), p=0.001 | |
| Intentional self-harm vs. No external causes | Q 108.12 (df=1), p=0.000 | Q 108.12 (df=1), p=0.000 | |
| Other unintentional causes of injury vs. No external causes | Q 22.35 (df=1), p=0.000 | Q 22.35 (df=1), p=0.000 | |
| Transport accidents vs. No external causes | Q 9.78 (df=1), p=0.002 | Q 9.78 (df=1), p=0.002 |
To close the project, we erase polars objects.
Code
rm(list = ls()[grepl("_pl$", ls())])Session info
Code
#|echo: true
#|error: true
#|message: true
#|paged.print: true
message(paste0("R library: ", Sys.getenv("R_LIBS_USER")))Code
message(paste0("Date: ",withr::with_locale(new = c('LC_TIME' = 'C'), code =Sys.time())))Code
message(paste0("Editor context: ", path))Code
cat("quarto version: "); quarto::quarto_version()quarto version:
[1] '1.7.29'
Code
sesion_info <- devtools::session_info()Warning in system2(“quarto”, “-V”, stdout = TRUE, env = paste0(“TMPDIR=”, : el comando ejecutado ‘“quarto” TMPDIR=C:/Users/andre/AppData/Local/Temp/RtmpUdCeX0/filec2083f0f34d4 -V’ tiene el estatus 1
Code
dplyr::select(
tibble::as_tibble(sesion_info$packages),
c(package, loadedversion, source)
) %>%
DT::datatable(filter = 'top', colnames = c('Row number' =1,'Package' = 2, 'Version'= 3),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
'', htmltools::em('R packages')),
options=list(
initComplete = htmlwidgets::JS(
"function(settings, json) {",
"$(this.api().tables().body()).css({
'font-family': 'Helvetica Neue',
'font-size': '70%',
'code-inline-font-size': '15%',
'white-space': 'nowrap',
'line-height': '0.75em',
'min-height': '0.5em'
});",
"}")))Code
#|echo: true
#|error: true
#|message: true
#|paged.print: true
#|class-output: center-table
#|eval: false
cat("Python version\n")Python version
Code
reticulate::py_config()python: G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python.exe
libpython: G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python311.dll
pythonhome: G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311
version: 3.11.5 | packaged by conda-forge | (main, Aug 27 2023, 03:23:48) [MSC v.1936 64 bit (AMD64)]
Architecture: 64bit
numpy: [NOT FOUND]
NOTE: Python version was forced by RETICULATE_PYTHON
Code
reticulate::py_list_packages() %>%
DT::datatable(filter = 'top', colnames = c('Row number' =1,'Package' = 2, 'Version'= 3),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
'', htmltools::em('Python packages')),
options=list(
initComplete = htmlwidgets::JS(
"function(settings, json) {",
"$(this.api().tables().body()).css({
'font-family': 'Helvetica Neue',
'font-size': '70%',
'code-inline-font-size': '15%',
'white-space': 'nowrap',
'line-height': '0.75em',
'min-height': '0.5em'
});",
"}"))) Warning in system2(python, args, stdout = TRUE): el comando ejecutado ‘“G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python.exe” -m pip freeze’ tiene el estatus 1
Save
Code
wdpath<-
paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))))
envpath<- if(regmatches(wdpath, regexpr("[A-Za-z]+", wdpath))=="G"){"G:/Mi unidad/Alvacast/SISTRAT 2023/"}else{"E:/Mi unidad/Alvacast/SISTRAT 2023/"}
paste0(getwd(),"/cons")
file.path(paste0(wdpath,"data/20241015_out"))
file.path(paste0(envpath,"data/20241015_out"))
# Save
rdata_path <- file.path(wdpath, "data/20241015_out", paste0("mort_", format(Sys.time(), "%Y_%m_%d"), ".Rdata"))
save.image(rdata_path)
cat("Saved in:",
rdata_path)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
if (Sys.getenv("RSTUDIO_SESSION_TYPE") == "server" || file.exists("/.dockerenv")) {
password <- Sys.getenv("PASSWORD_SECRET")
} else {
if (interactive()) {
utils::savehistory(tempfile())
Sys.setenv(PASSWORD_SECRET = readLines(paste0(wdpath, "secret.txt"), warn = FALSE))
utils::loadhistory()
}
Sys.setenv(PASSWORD_SECRET = readLines(paste0(wdpath, "secret.txt"), warn = FALSE))
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
save.image(paste0(rdata_path,".enc"))
# Encriptar el archivo en el mismo lugar
httr2::secret_encrypt_file(path = paste0(rdata_path,".enc"), key = "PASSWORD_SECRET")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("Copy renv lock into cons folder\n")
if (Sys.getenv("RSTUDIO_SESSION_TYPE") == "server" || file.exists("/.dockerenv")) {
message("Running on RStudio Server or inside Docker. Folder copy skipped.")
} else {
source_folder <-
destination_folder <- paste0(wdpath,"cons/renv")
# Copy the folder recursively
file.copy(paste0(wdpath,"renv.lock"), paste0(wdpath,"cons/renv.lock"), overwrite = TRUE)
message("Renv lock copy performed.")
}Code
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
time_after_dedup2<-Sys.time()
paste0("Time in markdown: ");time_after_dedup2-time_before_dedup2[1] "G:/My Drive/Alvacast/SISTRAT 2023/cons/cons"
[1] "G:/My Drive/Alvacast/SISTRAT 2023//data/20241015_out"
[1] "G:/Mi unidad/Alvacast/SISTRAT 2023/data/20241015_out"
Saved in: G:/My Drive/Alvacast/SISTRAT 2023///data/20241015_out/mort_2025_08_21.RdataCopy renv lock into cons folder
[1] "Time in markdown: "
Time difference of 76.19106 days